partial implementation

This commit is contained in:
Michele Artini 2024-11-20 13:20:13 +01:00
parent 76436688aa
commit e4faf7c227
3 changed files with 251 additions and 25 deletions

View File

@ -25,14 +25,22 @@ public class CommunityClient {
private static final Log log = LogFactory.getLog(CommunityClient.class);
@Value("${dnet.directindex.context.url}")
private String contextApiUrl;
@Value("${dnet.directindex.community.url}")
private String communityApiUrl;
@Cacheable("contexts")
public Collection<ContextInfo> findContexts(final String community) throws DirectIndexApiException {
if (!community.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(community)); }
public Collection<ContextInfo> findContexts(final String id) throws DirectIndexApiException {
final String context = community.substring(community.lastIndexOf("/") + 1);
// TODO prendere esempio da
// https://code-repo.d4science.org/D-Net/dnet-hadoop/src/commit/4d3aef3a095dfbc194983b024ae4ed688eda2b73/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java#L587
// https://code-repo.d4science.org/D-Net/dnet-hadoop/src/branch/main/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java
if (!id.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(id)); }
final String context = id.substring(id.lastIndexOf("/") + 1);
final String url = communityApiUrl + "/" + context + "/openairecommunities";
try {
@ -48,14 +56,27 @@ public class CommunityClient {
}
@Cacheable("context-labels")
public String findLabel(final String id) {
public String findContextLabel(final String id) {
// TODO
return null;
}
@Cacheable("category-labels")
public String findCategoryLabel(final String id) {
// TODO
return null;
}
@Cacheable("category-labels")
public String findConceptLabel(final String id) {
// TODO
return null;
}
private ContextInfo createContextInfo(final String community) {
// TODO labelMap ???
return createContextInfo(community.split("::"), 0);
}
@ -67,12 +88,24 @@ public class CommunityClient {
id.write("::");
id.write(arr[i + 1]);
}
final String label = findLabel(id.toString());
final String elem = pos == 0 ? "context" : pos == 1 ? "category" : "concept";
final ContextInfo info = new ContextInfo(elem, id.toString(), label);
final ContextInfo info = new ContextInfo();
info.setId(id.toString());
if (pos == 0) {
info.setElem("context");
info.setLabel(findContextLabel(id.toString()));
} else if (pos == 1) {
info.setElem("category");
info.setLabel(findCategoryLabel(id.toString()));
} else {
info.setElem("concept");
info.setLabel(findConceptLabel(id.toString()));
}
if (pos + 1 < arr.length) {
info.getChildren().add(createContextInfo(arr, pos + 1));
}
return info;
}
@ -116,14 +149,6 @@ public class CommunityClient {
private String label;
private List<ContextInfo> children = new ArrayList<>();
public ContextInfo() {}
public ContextInfo(final String elem, final String id, final String label) {
this.elem = elem;
this.id = id;
this.label = label;
}
public String getElem() {
return elem;
}

View File

@ -3,7 +3,9 @@ package eu.dnetlib.app.directindex.mapping;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.codec.digest.DigestUtils;
@ -24,12 +26,17 @@ import eu.dnetlib.dhp.schema.solr.AccessRight;
import eu.dnetlib.dhp.schema.solr.Author;
import eu.dnetlib.dhp.schema.solr.BestAccessRight;
import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Funder;
import eu.dnetlib.dhp.schema.solr.Funding;
import eu.dnetlib.dhp.schema.solr.FundingLevel;
import eu.dnetlib.dhp.schema.solr.Instance;
import eu.dnetlib.dhp.schema.solr.Language;
import eu.dnetlib.dhp.schema.solr.Pid;
import eu.dnetlib.dhp.schema.solr.Provenance;
import eu.dnetlib.dhp.schema.solr.RecordType;
import eu.dnetlib.dhp.schema.solr.RelatedRecord;
import eu.dnetlib.dhp.schema.solr.RelatedRecordHeader;
import eu.dnetlib.dhp.schema.solr.Result;
import eu.dnetlib.dhp.schema.solr.SolrRecord;
import eu.dnetlib.dhp.schema.solr.SolrRecordHeader;
@ -160,6 +167,7 @@ public class SolrRecordMapper {
// TODO
re.setContexts(null);
// TODO
re.setLinksToProjects(null);
@ -258,6 +266,48 @@ public class SolrRecordMapper {
private RelatedRecord prepareDnetProjectLink(final String link) {
final Map<String, String> info = new HashMap<>();
final String[] arr = link.split("/");
// info:eu-repo/grantAgreement/EC/FP7/244909/EU/Making Capabilities Work/WorkAble
if (arr.length > 4) {
final String acronym = arr.length > 7 ? arr[7] : "";
final String title = arr.length > 6 ? StringUtils.isNotBlank(arr[6]) ? arr[6] : acronym : "";
final String code = unescape(arr[4]);
final String jurisdiction = arr.length > 5 ? arr[5] : "";
final String funderId = calculateFunderId(arr[2], arr[3]);
final String funderShortName = fixFunderShortName(arr[2]);
final String funderName = calculateFunderName(funderShortName);
final String fundingName = calculateFundingName(funderShortName, arr[3]);
final Funding funding = new Funding();
funding.setFunder(Funder.newInstance(funderId, funderShortName, funderName, Country
.newInstance(jurisdiction, vocClient.findTermLabel("dnet:countries", jurisdiction)), null));
funding.setLevel0(FundingLevel.newInstance(funderId, funderShortName, fundingName));
final RelatedRecordHeader head = new RelatedRecordHeader();
head.setRelatedIdentifier(calculateProjectId(arr[2], arr[3], arr[4]));
head.setRelationType(null); // TODO
head.setRelatedRecordType(RecordType.project);
head.setRelationClass("isProducedBy");
head.setRelationProvenance("user:claim");
head.setTrust("0.9");
final RelatedRecord rel = new RelatedRecord();
rel.setHeader(head);
rel.setProjectTitle(title);
rel.setAcronym(acronym); // TODO
rel.setCode(code); // TODO
rel.setFunding(funding);
if (StringUtils.isNotBlank(arr[3])) {
info.put("fundingId", funderId + "::" + fundingName);
}
// @formatter:off
/*<rel inferred="false" trust="0.9" inferenceprovenance="" provenanceaction="user:claim">
<to class="isProducedBy" scheme="dnet:result_project_relations" type="project">$!esc.evaluate($!info.id)</to>
@ -279,11 +329,167 @@ public class SolrRecordMapper {
</rel>*/
// @formatter:on
final RelatedRecord rel = new RelatedRecord();
// TODO Auto-generated method stub
return rel;
}
return null;
}
private String calculateProjectId(final String funderShortName, final String funding, final String code) {
final String suffix = DigestUtils.md5Hex(code);
final String funderPrefix = calculateFunderPrefix(funderShortName, funding);
return funderPrefix + suffix;
}
private String calculateFunderPrefix(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "chist-era":
return "chistera____::";
case "conicyt":
return "conicytf____::";
case "dfg":
return "dfgf________::";
case "ec":
switch (funding.toLowerCase()) {
case "fp7":
return "corda_______::";
case "h2020":
return "corda__h2020::";
default:
return "corda_____he::";
}
case "eea":
return "euenvagency_::";
case "hrzz":
case "mzos":
return "irb_hr______::";
case "tara":
return "taraexp_____::";
case "tubitak":
return "tubitakf____::";
case "rcuk":
return "ukri________::";
default:
String prefix = funderShortName.toLowerCase();
// ensure we have 12 chars
while (prefix.length() < 12) {
prefix += "_";
}
return prefix + "::";
}
}
private String calculateFundingName(final String funderShortName, final String fundingName) {
switch (funderShortName) {
case "EC":
if (fundingName.toLowerCase().startsWith("horizon 2020")) { return "H2020"; }
if (fundingName.toLowerCase().startsWith("horizon europe")) { return "HE"; }
default:
return fundingName;
}
}
private String unescape(final String code) {
return code.replace("%2F", "/");
}
protected String calculateFunderId(final String funderShortName, final String funding) {
switch (funderShortName.toLowerCase()) {
case "ec":
return "ec__________::EC";
default:
final String fixedFunderShortName = fixFunderShortName(funderShortName);
final String prefix = calculateFunderPrefix(fixedFunderShortName, funding);
return prefix + fixedFunderShortName.toUpperCase();
}
}
protected String calculateFunderName(final String funderShortName) {
switch (funderShortName.toLowerCase()) {
case "aff":
case "aka":
return "Academy of Finland";
case "anr":
return "French National Research Agency (ANR)";
case "arc":
return "Australian Research Council (ARC)";
case "asap":
return "Aligning Science Across Parkinson's";
case "chist-era":
return "CHIST-ERA";
case "cihr":
return "Canadian Institutes of Health Research";
case "conicyt":
return "Comisión Nacional de Investigación Científica y Tecnológica";
case "dfg":
return "Deutsche Forschungsgemeinschaft";
case "ec":
return "European Commission";
case "eea":
return "European Environment Agency";
case "fct":
return "Fundação para a Ciência e a Tecnologia, I.P.";
case "fwf":
return "Austrian Science Fund (FWF)";
case "gsrt":
return "General Secretariat of Research and Technology (GSRT)";
case "hrzz":
return "Croatian Science Foundation (CSF)";
case "innoviris":
return "INNOVIRIS";
case "mestd":
return "Ministry of Education, Science and Technological Development of Republic of Serbia";
case "miur":
return "Ministero dell'Istruzione dell'Università e della Ricerca";
case "mzos":
return "Ministry of Science, Education and Sports of the Republic of Croatia (MSES)";
case "nhmrc":
return "National Health and Medical Research Council (NHMRC)";
case "nih":
return "National Institutes of Health";
case "nsf":
return "National Science Foundation";
case "nserc":
return "Natural Sciences and Engineering Research Council of Canada";
case "nwo":
return "Netherlands Organisation for Scientific Research (NWO)";
case "rcuk":
case "ukri":
return "UK Research and Innovation";
case "rif":
case "rpf":
return "Research and Innovation Foundation";
case "rsf":
return "Russian Science Foundation";
case "sfi":
return "Science Foundation Ireland";
case "sgov":
return "Gobierno de España";
case "snsf":
return "Swiss National Science Foundation";
case "sshrc":
return "Social Sciences and Humanities Research Council";
case "tara":
return "Tara Expeditions Foundation";
case "tubitak":
return "Türkiye Bilimsel ve Teknolojik Araştırma Kurumu";
case "wt":
return "Wellcome Trust";
default:
log.error("Funder short name '" + funderShortName + "' not managed");
return "";
}
}
// TODO: remove me when Zenodo ingests the good UKRI projects
private String fixFunderShortName(final String funderShortName) {
switch (funderShortName) {
case "RCUK":
return "UKRI";
default:
return funderShortName;
}
}
private Pid prepareDnetPid(final PidEntry pe) {
final Pid p = new Pid();
@ -307,8 +513,6 @@ public class SolrRecordMapper {
.map(ctx -> {
final Context context = new Context();
// TODO
return context;
})
.filter(Objects::nonNull)

View File

@ -100,13 +100,10 @@ public class SolrIndexClient {
private SolrParams queryParamsForId(final String id) {
// TODO: verificare se esiste un campo __date
final Map<String, String> params = new HashMap<String, String>();
params.put("q", "objidentifier:" + ClientUtils.escapeQueryChars(id));
params.put("fl", "objidentifier,__date,__json");
params.put("sort", "__date desc");
params.put("fl", "objidentifier,__json");
return new MapSolrParams(params);
}