diff --git a/src/main/java/eu/dnetlib/app/directindex/clients/CommunityClient.java b/src/main/java/eu/dnetlib/app/directindex/clients/CommunityClient.java index 20f0131..0ceda8a 100644 --- a/src/main/java/eu/dnetlib/app/directindex/clients/CommunityClient.java +++ b/src/main/java/eu/dnetlib/app/directindex/clients/CommunityClient.java @@ -25,14 +25,22 @@ public class CommunityClient { private static final Log log = LogFactory.getLog(CommunityClient.class); + @Value("${dnet.directindex.context.url}") + private String contextApiUrl; + @Value("${dnet.directindex.community.url}") private String communityApiUrl; @Cacheable("contexts") - public Collection findContexts(final String community) throws DirectIndexApiException { - if (!community.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(community)); } + public Collection findContexts(final String id) throws DirectIndexApiException { - final String context = community.substring(community.lastIndexOf("/") + 1); + // TODO prendere esempio da + // https://code-repo.d4science.org/D-Net/dnet-hadoop/src/commit/4d3aef3a095dfbc194983b024ae4ed688eda2b73/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java#L587 + // https://code-repo.d4science.org/D-Net/dnet-hadoop/src/branch/main/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java + + if (!id.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(id)); } + + final String context = id.substring(id.lastIndexOf("/") + 1); final String url = communityApiUrl + "/" + context + "/openairecommunities"; try { @@ -48,14 +56,27 @@ public class CommunityClient { } @Cacheable("context-labels") - public String findLabel(final String id) { + public String findContextLabel(final String id) { + // TODO + + return null; + } + + @Cacheable("category-labels") + public String findCategoryLabel(final String id) { + // TODO + + return null; + } + + @Cacheable("category-labels") + public String findConceptLabel(final String id) { // TODO return null; } private ContextInfo createContextInfo(final String community) { - // TODO labelMap ??? return createContextInfo(community.split("::"), 0); } @@ -67,12 +88,24 @@ public class CommunityClient { id.write("::"); id.write(arr[i + 1]); } - final String label = findLabel(id.toString()); - final String elem = pos == 0 ? "context" : pos == 1 ? "category" : "concept"; - final ContextInfo info = new ContextInfo(elem, id.toString(), label); + + final ContextInfo info = new ContextInfo(); + info.setId(id.toString()); + if (pos == 0) { + info.setElem("context"); + info.setLabel(findContextLabel(id.toString())); + } else if (pos == 1) { + info.setElem("category"); + info.setLabel(findCategoryLabel(id.toString())); + } else { + info.setElem("concept"); + info.setLabel(findConceptLabel(id.toString())); + } + if (pos + 1 < arr.length) { info.getChildren().add(createContextInfo(arr, pos + 1)); } + return info; } @@ -116,14 +149,6 @@ public class CommunityClient { private String label; private List children = new ArrayList<>(); - public ContextInfo() {} - - public ContextInfo(final String elem, final String id, final String label) { - this.elem = elem; - this.id = id; - this.label = label; - } - public String getElem() { return elem; } diff --git a/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java b/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java index be403f8..ebbc2c6 100644 --- a/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java +++ b/src/main/java/eu/dnetlib/app/directindex/mapping/SolrRecordMapper.java @@ -3,7 +3,9 @@ package eu.dnetlib.app.directindex.mapping; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import org.apache.commons.codec.digest.DigestUtils; @@ -24,12 +26,17 @@ import eu.dnetlib.dhp.schema.solr.AccessRight; import eu.dnetlib.dhp.schema.solr.Author; import eu.dnetlib.dhp.schema.solr.BestAccessRight; import eu.dnetlib.dhp.schema.solr.Context; +import eu.dnetlib.dhp.schema.solr.Country; +import eu.dnetlib.dhp.schema.solr.Funder; +import eu.dnetlib.dhp.schema.solr.Funding; +import eu.dnetlib.dhp.schema.solr.FundingLevel; import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Language; import eu.dnetlib.dhp.schema.solr.Pid; import eu.dnetlib.dhp.schema.solr.Provenance; import eu.dnetlib.dhp.schema.solr.RecordType; import eu.dnetlib.dhp.schema.solr.RelatedRecord; +import eu.dnetlib.dhp.schema.solr.RelatedRecordHeader; import eu.dnetlib.dhp.schema.solr.Result; import eu.dnetlib.dhp.schema.solr.SolrRecord; import eu.dnetlib.dhp.schema.solr.SolrRecordHeader; @@ -160,6 +167,7 @@ public class SolrRecordMapper { // TODO re.setContexts(null); + // TODO re.setLinksToProjects(null); @@ -258,6 +266,48 @@ public class SolrRecordMapper { private RelatedRecord prepareDnetProjectLink(final String link) { + final Map info = new HashMap<>(); + final String[] arr = link.split("/"); + // info:eu-repo/grantAgreement/EC/FP7/244909/EU/Making Capabilities Work/WorkAble + + if (arr.length > 4) { + final String acronym = arr.length > 7 ? arr[7] : ""; + final String title = arr.length > 6 ? StringUtils.isNotBlank(arr[6]) ? arr[6] : acronym : ""; + final String code = unescape(arr[4]); + + final String jurisdiction = arr.length > 5 ? arr[5] : ""; + final String funderId = calculateFunderId(arr[2], arr[3]); + + final String funderShortName = fixFunderShortName(arr[2]); + final String funderName = calculateFunderName(funderShortName); + + final String fundingName = calculateFundingName(funderShortName, arr[3]); + + final Funding funding = new Funding(); + + funding.setFunder(Funder.newInstance(funderId, funderShortName, funderName, Country + .newInstance(jurisdiction, vocClient.findTermLabel("dnet:countries", jurisdiction)), null)); + funding.setLevel0(FundingLevel.newInstance(funderId, funderShortName, fundingName)); + + final RelatedRecordHeader head = new RelatedRecordHeader(); + head.setRelatedIdentifier(calculateProjectId(arr[2], arr[3], arr[4])); + head.setRelationType(null); // TODO + head.setRelatedRecordType(RecordType.project); + head.setRelationClass("isProducedBy"); + head.setRelationProvenance("user:claim"); + head.setTrust("0.9"); + + final RelatedRecord rel = new RelatedRecord(); + rel.setHeader(head); + rel.setProjectTitle(title); + rel.setAcronym(acronym); // TODO + rel.setCode(code); // TODO + rel.setFunding(funding); + + if (StringUtils.isNotBlank(arr[3])) { + info.put("fundingId", funderId + "::" + fundingName); + } + // @formatter:off /* $!esc.evaluate($!info.id) @@ -279,10 +329,166 @@ public class SolrRecordMapper { */ // @formatter:on - final RelatedRecord rel = new RelatedRecord(); + return rel; + } + return null; + } - // TODO Auto-generated method stub - return rel; + private String calculateProjectId(final String funderShortName, final String funding, final String code) { + final String suffix = DigestUtils.md5Hex(code); + final String funderPrefix = calculateFunderPrefix(funderShortName, funding); + return funderPrefix + suffix; + } + + private String calculateFunderPrefix(final String funderShortName, final String funding) { + switch (funderShortName.toLowerCase()) { + case "chist-era": + return "chistera____::"; + case "conicyt": + return "conicytf____::"; + case "dfg": + return "dfgf________::"; + case "ec": + switch (funding.toLowerCase()) { + case "fp7": + return "corda_______::"; + case "h2020": + return "corda__h2020::"; + default: + return "corda_____he::"; + } + case "eea": + return "euenvagency_::"; + case "hrzz": + case "mzos": + return "irb_hr______::"; + case "tara": + return "taraexp_____::"; + case "tubitak": + return "tubitakf____::"; + case "rcuk": + return "ukri________::"; + default: + String prefix = funderShortName.toLowerCase(); + // ensure we have 12 chars + while (prefix.length() < 12) { + prefix += "_"; + } + return prefix + "::"; + } + } + + private String calculateFundingName(final String funderShortName, final String fundingName) { + switch (funderShortName) { + case "EC": + if (fundingName.toLowerCase().startsWith("horizon 2020")) { return "H2020"; } + if (fundingName.toLowerCase().startsWith("horizon europe")) { return "HE"; } + default: + return fundingName; + } + } + + private String unescape(final String code) { + return code.replace("%2F", "/"); + } + + protected String calculateFunderId(final String funderShortName, final String funding) { + switch (funderShortName.toLowerCase()) { + case "ec": + return "ec__________::EC"; + default: + final String fixedFunderShortName = fixFunderShortName(funderShortName); + final String prefix = calculateFunderPrefix(fixedFunderShortName, funding); + return prefix + fixedFunderShortName.toUpperCase(); + } + } + + protected String calculateFunderName(final String funderShortName) { + + switch (funderShortName.toLowerCase()) { + case "aff": + case "aka": + return "Academy of Finland"; + case "anr": + return "French National Research Agency (ANR)"; + case "arc": + return "Australian Research Council (ARC)"; + case "asap": + return "Aligning Science Across Parkinson's"; + case "chist-era": + return "CHIST-ERA"; + case "cihr": + return "Canadian Institutes of Health Research"; + case "conicyt": + return "Comisión Nacional de Investigación Científica y Tecnológica"; + case "dfg": + return "Deutsche Forschungsgemeinschaft"; + case "ec": + return "European Commission"; + case "eea": + return "European Environment Agency"; + case "fct": + return "Fundação para a Ciência e a Tecnologia, I.P."; + case "fwf": + return "Austrian Science Fund (FWF)"; + case "gsrt": + return "General Secretariat of Research and Technology (GSRT)"; + case "hrzz": + return "Croatian Science Foundation (CSF)"; + case "innoviris": + return "INNOVIRIS"; + case "mestd": + return "Ministry of Education, Science and Technological Development of Republic of Serbia"; + case "miur": + return "Ministero dell'Istruzione dell'Università e della Ricerca"; + case "mzos": + return "Ministry of Science, Education and Sports of the Republic of Croatia (MSES)"; + case "nhmrc": + return "National Health and Medical Research Council (NHMRC)"; + case "nih": + return "National Institutes of Health"; + case "nsf": + return "National Science Foundation"; + case "nserc": + return "Natural Sciences and Engineering Research Council of Canada"; + case "nwo": + return "Netherlands Organisation for Scientific Research (NWO)"; + case "rcuk": + case "ukri": + return "UK Research and Innovation"; + case "rif": + case "rpf": + return "Research and Innovation Foundation"; + case "rsf": + return "Russian Science Foundation"; + case "sfi": + return "Science Foundation Ireland"; + case "sgov": + return "Gobierno de España"; + case "snsf": + return "Swiss National Science Foundation"; + case "sshrc": + return "Social Sciences and Humanities Research Council"; + case "tara": + return "Tara Expeditions Foundation"; + case "tubitak": + return "Türkiye Bilimsel ve Teknolojik Araştırma Kurumu"; + case "wt": + return "Wellcome Trust"; + default: + log.error("Funder short name '" + funderShortName + "' not managed"); + return ""; + } + } + + // TODO: remove me when Zenodo ingests the good UKRI projects + private String fixFunderShortName(final String funderShortName) { + switch (funderShortName) { + case "RCUK": + return "UKRI"; + default: + return funderShortName; + } } private Pid prepareDnetPid(final PidEntry pe) { @@ -307,8 +513,6 @@ public class SolrRecordMapper { .map(ctx -> { final Context context = new Context(); - // TODO - return context; }) .filter(Objects::nonNull) diff --git a/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java index 394ea4d..578826b 100644 --- a/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java +++ b/src/main/java/eu/dnetlib/app/directindex/solr/SolrIndexClient.java @@ -100,13 +100,10 @@ public class SolrIndexClient { private SolrParams queryParamsForId(final String id) { - // TODO: verificare se esiste un campo __date - final Map params = new HashMap(); params.put("q", "objidentifier:" + ClientUtils.escapeQueryChars(id)); - params.put("fl", "objidentifier,__date,__json"); - params.put("sort", "__date desc"); + params.put("fl", "objidentifier,__json"); return new MapSolrParams(params); }