partial implementation

This commit is contained in:
Michele Artini 2024-11-21 15:35:15 +01:00
parent da7c66d29d
commit 034a0d65d6
7 changed files with 396 additions and 212 deletions

View File

@ -17,7 +17,7 @@ import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;
@Component
public class CommunityClient {
public class CommunityClient implements HasCache {
private static final String ZENODO_COMMUNITY = "zenodo.org/communities/";
@ -80,6 +80,7 @@ public class CommunityClient {
return res;
}
@Override
@CacheEvict(value = { "contexts" }, allEntries = true)
public void clearCache() {}

View File

@ -17,7 +17,7 @@ import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.input.DatasourceEntry;
@Component
public class DatasourceManagerClient {
public class DatasourceManagerClient implements HasCache {
@Value("${dnet.directindex.dsm.url}")
private String dsmApiUrl;
@ -45,6 +45,7 @@ public class DatasourceManagerClient {
}
}
@Override
@CacheEvict(value = "datasources", allEntries = true)
public void clearCache() {}

View File

@ -0,0 +1,6 @@
package eu.dnetlib.app.directindex.clients;
public interface HasCache {
void clearCache();
}

View File

@ -0,0 +1,310 @@
package eu.dnetlib.app.directindex.clients;
import java.io.Serializable;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.cache.annotation.CacheEvict;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;
@Component
public class ProjectClient implements HasCache {
private static final Log log = LogFactory.getLog(ProjectClient.class);
@Cacheable("projects")
public ProjectInfo resolveProjectLink(final String s) {
// info:eu-repo/grantAgreement/EC/FP7/244909/EU/Making Capabilities Work/WorkAble
final String[] arr = s.split("/");
if (arr.length <= 4) { return null; }
final ProjectInfo info = new ProjectInfo();
final String acronym = arr.length > 7 ? arr[7] : "";
final String title = arr.length > 6 ? StringUtils.isNotBlank(arr[6]) ? arr[6] : acronym : "";
final String code = unescape(arr[4]);
final String jurisdiction = arr.length > 5 ? arr[5] : "";
final String funderShortName = fixFunderShortName(arr[2]);
final String funderId = calculateFunderId(funderShortName, arr[3]);
final String funderName = calculateFunderName(funderShortName);
final String fundingId = calculateFundingId(funderId, arr[3]);
final String fundingName = calculateFundingName(funderShortName, arr[3]);
info.setId(calculateProjectId(funderShortName, fundingName, code));
info.setAcronym(acronym);
info.setTitle(title);
info.setCode(code);
info.setJurisdiction(jurisdiction);
info.setFunderId(funderId);
info.setFunderShortName(funderShortName);
info.setFunderName(funderName);
info.setFundingId(fundingId);
info.setFundingName(fundingName);
return info;
}
@Override
@CacheEvict(value = "projects", allEntries = true)
public void clearCache() {}
private String calculateProjectId(final String funderShortName, final String funding, final String code) {
return String.format("%s::%s", calculateFunderPrefix(funderShortName, funding), DigestUtils.md5Hex(code));
}
private String calculateFunderPrefix(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "chist-era":
return "chistera____";
case "conicyt":
return "conicytf____";
case "dfg":
return "dfgf________";
case "ec":
switch (funding.toLowerCase()) {
case "fp7":
return "corda_______";
case "h2020":
return "corda__h2020";
default:
return "corda_____he";
}
case "eea":
return "euenvagency_";
case "hrzz":
case "mzos":
return "irb_hr______";
case "tara":
return "taraexp_____";
case "tubitak":
return "tubitakf____";
case "rcuk":
return "ukri________";
default:
String prefix = funderShortName.toLowerCase();
// ensure we have 12 chars
while (prefix.length() < 12) {
prefix += "_";
}
return prefix + "";
}
}
private String calculateFundingId(final String funderId, final String funding) {
return StringUtils.isNoneBlank(funderId, funding) ? String.format("%s::%s", funderId, funding) : null;
}
private String calculateFundingName(final String funderShortName, final String fundingName) {
switch (funderShortName) {
case "EC":
if (fundingName.toLowerCase().startsWith("horizon 2020")) { return "H2020"; }
if (fundingName.toLowerCase().startsWith("horizon europe")) { return "HE"; }
default:
return fundingName;
}
}
private String unescape(final String code) {
return code.replace("%2F", "/");
}
protected String calculateFunderId(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "ec":
return "ec__________::EC";
default:
final String fixedFunderShortName = fixFunderShortName(funderShortName);
final String prefix = calculateFunderPrefix(fixedFunderShortName, funding);
return prefix + fixedFunderShortName.toUpperCase();
}
}
protected String calculateFunderName(final String funderShortName) {
switch (funderShortName.toLowerCase()) {
case "aff":
case "aka":
return "Academy of Finland";
case "anr":
return "French National Research Agency (ANR)";
case "arc":
return "Australian Research Council (ARC)";
case "asap":
return "Aligning Science Across Parkinson's";
case "chist-era":
return "CHIST-ERA";
case "cihr":
return "Canadian Institutes of Health Research";
case "conicyt":
return "Comisión Nacional de Investigación Científica y Tecnológica";
case "dfg":
return "Deutsche Forschungsgemeinschaft";
case "ec":
return "European Commission";
case "eea":
return "European Environment Agency";
case "fct":
return "Fundação para a Ciência e a Tecnologia, I.P.";
case "fwf":
return "Austrian Science Fund (FWF)";
case "gsrt":
return "General Secretariat of Research and Technology (GSRT)";
case "hrzz":
return "Croatian Science Foundation (CSF)";
case "innoviris":
return "INNOVIRIS";
case "mestd":
return "Ministry of Education, Science and Technological Development of Republic of Serbia";
case "miur":
return "Ministero dell'Istruzione dell'Università e della Ricerca";
case "mzos":
return "Ministry of Science, Education and Sports of the Republic of Croatia (MSES)";
case "nhmrc":
return "National Health and Medical Research Council (NHMRC)";
case "nih":
return "National Institutes of Health";
case "nsf":
return "National Science Foundation";
case "nserc":
return "Natural Sciences and Engineering Research Council of Canada";
case "nwo":
return "Netherlands Organisation for Scientific Research (NWO)";
case "rcuk":
case "ukri":
return "UK Research and Innovation";
case "rif":
case "rpf":
return "Research and Innovation Foundation";
case "rsf":
return "Russian Science Foundation";
case "sfi":
return "Science Foundation Ireland";
case "sgov":
return "Gobierno de España";
case "snsf":
return "Swiss National Science Foundation";
case "sshrc":
return "Social Sciences and Humanities Research Council";
case "tara":
return "Tara Expeditions Foundation";
case "tubitak":
return "Türkiye Bilimsel ve Teknolojik Araştırma Kurumu";
case "wt":
return "Wellcome Trust";
default:
log.error("Funder short name '" + funderShortName + "' not managed");
return "";
}
}
// TODO: remove me when Zenodo ingests the good UKRI projects
private String fixFunderShortName(final String funderShortName) {
return "RCUK".equals(funderShortName) ? "UKRI" : funderShortName;
}
public class ProjectInfo implements Serializable {
private static final long serialVersionUID = 4433787349231982285L;
private String id;
private String acronym;
private String title;
private String code;
private String jurisdiction;
private String funderId;
private String funderShortName;
private String funderName;
private String fundingId;
private String fundingName;
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(final String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(final String title) {
this.title = title;
}
public String getCode() {
return code;
}
public void setCode(final String code) {
this.code = code;
}
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(final String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public String getFunderId() {
return funderId;
}
public void setFunderId(final String funderId) {
this.funderId = funderId;
}
public String getFunderShortName() {
return funderShortName;
}
public void setFunderShortName(final String funderShortName) {
this.funderShortName = funderShortName;
}
public String getFunderName() {
return funderName;
}
public void setFunderName(final String funderName) {
this.funderName = funderName;
}
public String getFundingId() {
return fundingId;
}
public void setFundingId(final String fundingId) {
this.fundingId = fundingId;
}
public String getFundingName() {
return fundingName;
}
public void setFundingName(final String fundingName) {
this.fundingName = fundingName;
}
}
}

View File

@ -18,7 +18,7 @@ import org.springframework.web.client.RestTemplate;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
@Component
public class VocabularyClient {
public class VocabularyClient implements HasCache {
private static final Log log = LogFactory.getLog(VocabularyClient.class);
@ -53,6 +53,7 @@ public class VocabularyClient {
}
}
@Override
@CacheEvict(value = "vocabularies", allEntries = true)
public void clearCache() {}

View File

@ -1,5 +1,7 @@
package eu.dnetlib.app.directindex.controllers;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.web.bind.annotation.GetMapping;
@ -8,33 +10,23 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseStatus;
import org.springframework.web.bind.annotation.RestController;
import eu.dnetlib.app.directindex.clients.CommunityClient;
import eu.dnetlib.app.directindex.clients.DatasourceManagerClient;
import eu.dnetlib.app.directindex.clients.VocabularyClient;
import eu.dnetlib.app.directindex.clients.HasCache;
import eu.dnetlib.app.directindex.tasks.ScheduledActions;
@RestController
@RequestMapping("/api/admin")
public class AdminController {
@Autowired
private VocabularyClient vocabularyClient;
@Autowired
private DatasourceManagerClient dsmClient;
@Autowired
private CommunityClient communityClient;
@Autowired
private ScheduledActions scheduledActions;
@Autowired
private List<HasCache> clients;
@GetMapping("/evictCache")
@ResponseStatus(HttpStatus.OK)
public void evictCache() {
vocabularyClient.clearCache();
dsmClient.clearCache();
communityClient.clearCache();
clients.forEach(HasCache::clearCache);
}
@GetMapping("/scheduling/{enabled}")

View File

@ -2,7 +2,6 @@ package eu.dnetlib.app.directindex.mapping;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@ -18,6 +17,8 @@ import org.springframework.stereotype.Component;
import eu.dnetlib.app.directindex.clients.CommunityClient;
import eu.dnetlib.app.directindex.clients.CommunityClient.ContextInfo;
import eu.dnetlib.app.directindex.clients.DatasourceManagerClient;
import eu.dnetlib.app.directindex.clients.ProjectClient;
import eu.dnetlib.app.directindex.clients.ProjectClient.ProjectInfo;
import eu.dnetlib.app.directindex.clients.VocabularyClient;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.input.DatasourceEntry;
@ -58,6 +59,9 @@ public class SolrRecordMapper {
@Autowired
private CommunityClient communityClient;
@Autowired
private ProjectClient projectClient;
private static final Log log = LogFactory.getLog(SolrRecordMapper.class);
public SolrRecord toSolrRecord(final ResultEntry result) {
@ -169,11 +173,40 @@ public class SolrRecordMapper {
}
}
// TODO
re.setContexts(null);
final List<String> contexts = new ArrayList<String>();
if (r.getContext() != null) {
for (final Context ctx : r.getContext()) {
if (ctx.getCategory() != null && ctx.getCategory().size() > 0) {
for (final Category cat : ctx.getCategory()) {
if (cat.getConcept() != null && cat.getConcept().size() > 0) {
for (final Concept cpt : cat.getConcept()) {
contexts.add(cpt.getId());
}
}
}
} else {
contexts.add(ctx.getId());
}
}
}
re.setContexts(contexts);
// TODO
re.setLinksToProjects(null);
// @formatter:off
final List<String> projects = r.getLinks()
.stream()
.filter(l -> l.getHeader().getRelatedRecordType() == RecordType.project)
.map(p -> String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s",
StringUtils.defaultIfBlank(p.getFunding().getFunder().getShortname(), ""),
StringUtils.defaultIfBlank(p.getFunding().getLevel0().getName(), ""),
StringUtils.defaultIfBlank(p.getCode(), ""),
StringUtils.defaultIfBlank(p.getFunding().getFunder().getJurisdiction().getCode(), ""),
StringUtils.defaultIfBlank(p.getProjectTitle(), ""),
StringUtils.defaultIfBlank(p.getAcronym(), "")))
.distinct()
.collect(Collectors.toList());
// @formatter:on
re.setLinksToProjects(projects);
return re;
}
@ -270,208 +303,48 @@ public class SolrRecordMapper {
private RelatedRecord prepareDnetProjectLink(final String link) {
final Map<String, String> info = new HashMap<>();
final String[] arr = link.split("/");
// info:eu-repo/grantAgreement/EC/FP7/244909/EU/Making Capabilities Work/WorkAble
if (arr.length > 4) {
final String acronym = arr.length > 7 ? arr[7] : "";
final String title = arr.length > 6 ? StringUtils.isNotBlank(arr[6]) ? arr[6] : acronym : "";
final String code = unescape(arr[4]);
final ProjectInfo info = projectClient.resolveProjectLink(link);
final String jurisdiction = arr.length > 5 ? arr[5] : "";
final String funderId = calculateFunderId(arr[2], arr[3]);
if (info == null) { return null; }
final String funderShortName = fixFunderShortName(arr[2]);
final String funderName = calculateFunderName(funderShortName);
final RelatedRecordHeader head = new RelatedRecordHeader();
head.setRelatedIdentifier(info.getId());
head.setRelationType(ModelConstants.RESULT_PROJECT);
head.setRelatedRecordType(RecordType.project);
head.setRelationClass(ModelConstants.IS_PRODUCED_BY);
head.setRelationProvenance(ModelConstants.USER_CLAIM);
head.setTrust("0.9");
final String fundingName = calculateFundingName(funderShortName, arr[3]);
final RelatedRecord rel = new RelatedRecord();
rel.setHeader(head);
rel.setProjectTitle(info.getTitle());
rel.setAcronym(info.getAcronym());
rel.setCode(info.getCode());
if (StringUtils.isNotBlank(info.getFunderId())) {
final Funding funding = new Funding();
funding.setFunder(Funder.newInstance(funderId, funderShortName, funderName, Country
.newInstance(jurisdiction, vocClient.findTermLabel("dnet:countries", jurisdiction)), null));
funding.setLevel0(FundingLevel.newInstance(funderId, funderShortName, fundingName));
final Funder funder = new Funder();
funder.setId(info.getFunderId());
funder.setName(info.getFunderName());
funder.setShortname(info.getFunderShortName());
funder.setJurisdiction(Country.newInstance(info.getJurisdiction(), vocClient.findTermLabel("dnet:countries", info.getJurisdiction())));
final RelatedRecordHeader head = new RelatedRecordHeader();
head.setRelatedIdentifier(calculateProjectId(arr[2], arr[3], arr[4]));
head.setRelationType(ModelConstants.RESULT_PROJECT);
head.setRelatedRecordType(RecordType.project);
head.setRelationClass(ModelConstants.IS_PRODUCED_BY);
head.setRelationProvenance(ModelConstants.USER_CLAIM);
head.setTrust("0.9");
funding.setFunder(funder);
if (StringUtils.isNotBlank(info.getFundingId())) {
final FundingLevel level = new FundingLevel();
level.setId(info.getFundingId());
level.setName(info.getFundingName());
level.setDescription(info.getFundingName());
funding.setLevel0(level);
}
final RelatedRecord rel = new RelatedRecord();
rel.setHeader(head);
rel.setProjectTitle(title);
rel.setAcronym(acronym);
rel.setCode(code);
rel.setFunding(funding);
if (StringUtils.isNotBlank(arr[3])) {
info.put("fundingId", funderId + "::" + fundingName);
}
return rel;
}
return null;
}
private String calculateProjectId(final String funderShortName, final String funding, final String code) {
final String suffix = DigestUtils.md5Hex(code);
final String funderPrefix = calculateFunderPrefix(funderShortName, funding);
return funderPrefix + suffix;
}
private String calculateFunderPrefix(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "chist-era":
return "chistera____::";
case "conicyt":
return "conicytf____::";
case "dfg":
return "dfgf________::";
case "ec":
switch (funding.toLowerCase()) {
case "fp7":
return "corda_______::";
case "h2020":
return "corda__h2020::";
default:
return "corda_____he::";
}
case "eea":
return "euenvagency_::";
case "hrzz":
case "mzos":
return "irb_hr______::";
case "tara":
return "taraexp_____::";
case "tubitak":
return "tubitakf____::";
case "rcuk":
return "ukri________::";
default:
String prefix = funderShortName.toLowerCase();
// ensure we have 12 chars
while (prefix.length() < 12) {
prefix += "_";
}
return prefix + "::";
}
}
private String calculateFundingName(final String funderShortName, final String fundingName) {
switch (funderShortName) {
case "EC":
if (fundingName.toLowerCase().startsWith("horizon 2020")) { return "H2020"; }
if (fundingName.toLowerCase().startsWith("horizon europe")) { return "HE"; }
default:
return fundingName;
}
}
private String unescape(final String code) {
return code.replace("%2F", "/");
}
protected String calculateFunderId(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "ec":
return "ec__________::EC";
default:
final String fixedFunderShortName = fixFunderShortName(funderShortName);
final String prefix = calculateFunderPrefix(fixedFunderShortName, funding);
return prefix + fixedFunderShortName.toUpperCase();
}
}
protected String calculateFunderName(final String funderShortName) {
switch (funderShortName.toLowerCase()) {
case "aff":
case "aka":
return "Academy of Finland";
case "anr":
return "French National Research Agency (ANR)";
case "arc":
return "Australian Research Council (ARC)";
case "asap":
return "Aligning Science Across Parkinson's";
case "chist-era":
return "CHIST-ERA";
case "cihr":
return "Canadian Institutes of Health Research";
case "conicyt":
return "Comisión Nacional de Investigación Científica y Tecnológica";
case "dfg":
return "Deutsche Forschungsgemeinschaft";
case "ec":
return "European Commission";
case "eea":
return "European Environment Agency";
case "fct":
return "Fundação para a Ciência e a Tecnologia, I.P.";
case "fwf":
return "Austrian Science Fund (FWF)";
case "gsrt":
return "General Secretariat of Research and Technology (GSRT)";
case "hrzz":
return "Croatian Science Foundation (CSF)";
case "innoviris":
return "INNOVIRIS";
case "mestd":
return "Ministry of Education, Science and Technological Development of Republic of Serbia";
case "miur":
return "Ministero dell'Istruzione dell'Università e della Ricerca";
case "mzos":
return "Ministry of Science, Education and Sports of the Republic of Croatia (MSES)";
case "nhmrc":
return "National Health and Medical Research Council (NHMRC)";
case "nih":
return "National Institutes of Health";
case "nsf":
return "National Science Foundation";
case "nserc":
return "Natural Sciences and Engineering Research Council of Canada";
case "nwo":
return "Netherlands Organisation for Scientific Research (NWO)";
case "rcuk":
case "ukri":
return "UK Research and Innovation";
case "rif":
case "rpf":
return "Research and Innovation Foundation";
case "rsf":
return "Russian Science Foundation";
case "sfi":
return "Science Foundation Ireland";
case "sgov":
return "Gobierno de España";
case "snsf":
return "Swiss National Science Foundation";
case "sshrc":
return "Social Sciences and Humanities Research Council";
case "tara":
return "Tara Expeditions Foundation";
case "tubitak":
return "Türkiye Bilimsel ve Teknolojik Araştırma Kurumu";
case "wt":
return "Wellcome Trust";
default:
log.error("Funder short name '" + funderShortName + "' not managed");
return "";
}
}
// TODO: remove me when Zenodo ingests the good UKRI projects
private String fixFunderShortName(final String funderShortName) {
switch (funderShortName) {
case "RCUK":
return "UKRI";
default:
return funderShortName;
}
return rel;
}
private Pid prepareDnetPid(final PidEntry pe) {