partial implementation

This commit is contained in:
Michele Artini 2024-11-18 16:04:56 +01:00
parent 740a4f63a2
commit 1e6880ff95
6 changed files with 224 additions and 33 deletions

View File

@ -53,12 +53,6 @@ public class DirectIndexApplication {
@Value("${dnet.directindex.description}")
private String publicDesc;
@Value("${openaire.service.islookup.wsdl}")
private String isLookupUrl;
private static final int isLookupRequestTimeout = 60000 * 10;
private static final int isLookupConnectTimeout = 60000 * 10;
public static void main(final String[] args) {
SpringApplication.run(DirectIndexApplication.class, args);
}

View File

@ -1,9 +1,10 @@
package eu.dnetlib.app.directindex.clients;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.logging.Log;
@ -28,22 +29,55 @@ public class CommunityClient {
private String communityApiUrl;
@Cacheable("communities")
public Collection<String> translateZenodoCommunity(final String community) throws DirectIndexApiException {
if (!community.contains(ZENODO_COMMUNITY)) { return Arrays.asList(community); }
public Collection<ContextInfo> findCommunities(final String community) throws DirectIndexApiException {
if (!community.contains(ZENODO_COMMUNITY)) { return Arrays.asList(createContextInfo(community)); }
final String context = community.substring(community.lastIndexOf("/") + 1);
final RestTemplate rt = new RestTemplate();
final String url = communityApiUrl + "/" + context + "/openairecommunities";
try {
return new HashSet<>(rt.getForObject(url, ZenodoContextList.class).getOpenAirecommunitylist());
return rt.getForObject(url, ZenodoContextList.class)
.getOpenAirecommunitylist()
.stream()
.map(this::createContextInfo)
.toList();
} catch (final RestClientException e) {
log.error("Unable to get object for url: " + url, e);
throw new DirectIndexApiException("Unable to get object for url: " + url, e);
}
}
@CacheEvict(value = "communities", allEntries = true)
@Cacheable("community-labels")
public String findLabel(final String id) {
// TODO
return null;
}
private ContextInfo createContextInfo(final String community) {
// TODO labelMap ???
return createContextInfo(community.split("::"), 0);
}
private ContextInfo createContextInfo(final String[] arr, final int pos) {
final StringWriter id = new StringWriter();
id.write(arr[0]);
for (int i = 0; i < pos; i++) {
id.write("::");
id.write(arr[i + 1]);
}
final String label = findLabel(id.toString());
final String elem = pos == 0 ? "context" : pos == 1 ? "category" : "concept";
final ContextInfo info = new ContextInfo(elem, id.toString(), label);
if (pos + 1 < arr.length) {
info.getChildren().add(createContextInfo(arr, pos + 1));
}
return info;
}
@CacheEvict(value = { "communities", "community-labels" }, allEntries = true)
public void clearCache() {}
public class ZenodoContextList implements Serializable {
@ -73,4 +107,60 @@ public class CommunityClient {
}
}
public class ContextInfo implements Serializable {
private static final long serialVersionUID = 96456546778111904L;
private String elem;
private String id;
private String label;
private List<ContextInfo> children = new ArrayList<>();
public ContextInfo() {}
public ContextInfo(final String elem, final String id, final String label) {
this.elem = elem;
this.id = id;
this.label = label;
}
public String getElem() {
return elem;
}
public void setElem(final String elem) {
this.elem = elem;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public List<ContextInfo> getChildren() {
return children;
}
public void setChildren(final List<ContextInfo> children) {
this.children = children;
}
public boolean isRoot() {
return "context".equals(elem);
}
public String getLabel() {
return label;
}
public void setLabel(final String label) {
this.label = label;
}
}
}

View File

@ -49,6 +49,7 @@ public class ResultEntry implements Serializable {
@Schema(requiredMode = RequiredMode.REQUIRED, description = "Use opendoar___::2659 for Zenodo Publications; re3data_____::r3d100010468 for Zenodo datasets; infrastruct::openaire for OpenAIRE portal.")
private String collectedFromId;
private String hostedById;
// String according to the EGI context profile, example: egi::classification::natsc::math

View File

@ -1,14 +1,33 @@
package eu.dnetlib.app.directindex.mapping;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import eu.dnetlib.app.directindex.clients.CommunityClient;
import eu.dnetlib.app.directindex.clients.DatasourceManagerClient;
import eu.dnetlib.app.directindex.clients.VocabularyClient;
import eu.dnetlib.app.directindex.errors.DirectIndexApiException;
import eu.dnetlib.app.directindex.input.DatasourceEntry;
import eu.dnetlib.app.directindex.input.PidEntry;
import eu.dnetlib.app.directindex.input.ResultEntry;
import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Pid;
import eu.dnetlib.dhp.schema.solr.Provenance;
import eu.dnetlib.dhp.schema.solr.RecordType;
import eu.dnetlib.dhp.schema.solr.RelatedRecord;
import eu.dnetlib.dhp.schema.solr.Result;
import eu.dnetlib.dhp.schema.solr.SolrRecord;
import eu.dnetlib.dhp.schema.solr.SolrRecordHeader;
import eu.dnetlib.dhp.schema.solr.SolrRecordHeader.Status;
@Component
public class SolrRecordMapper {
@ -16,24 +35,46 @@ public class SolrRecordMapper {
@Autowired
private DatasourceManagerClient dsmClient;
public SolrRecord toSolrRecord(final ResultEntry result) {
// TODO
@Autowired
private VocabularyClient vocClient;
@Autowired
private CommunityClient communityClient;
private static final Log log = LogFactory.getLog(SolrRecordMapper.class);
public SolrRecord toSolrRecord(final ResultEntry result) {
final SolrRecord sr = new SolrRecord();
sr.setHeader(null);
sr.setResult(null);
sr.setHeader(prepareDnetHeader(result));
sr.setResult(prepareDnetResult(result));
sr.setDatasource(null); // NULL for Result Entry
sr.setProject(null); // NULL for Result Entry
sr.setOrganization(null); // NULL for Result Entry
sr.setPerson(null); // NULL for Result Entry
// sr.setDatasource(null);
// sr.setProject(null);
// sr.setOrganization(null);
// sr.setPerson(null);
sr.setLinks(null);
sr.setCollectedfrom(null);
sr.setPid(null);
sr.setContext(null);
sr.setMeasures(null);
sr.setCollectedfrom(Arrays.asList(prepareDnetCollectedFrom(result)));
if (result.getLinksToProjects() != null) {
sr.setLinks(result.getLinksToProjects()
.stream()
.map(this::prepareDnetProjectLink)
.toList());
}
if (result.getPids() != null) {
sr.setPid(result.getPids()
.stream()
.map(this::prepareDnetPid)
.toList());
}
if (result.getContexts() != null) {
sr.setContext(prepareDnetContext(result.getContexts()));
}
// sr.setMeasures(null);
return sr;
}
@ -43,6 +84,80 @@ public class SolrRecordMapper {
return null;
}
private SolrRecordHeader prepareDnetHeader(final ResultEntry re) {
final SolrRecordHeader header = new SolrRecordHeader();
header.setId(re.getOpenaireId());
header.setOriginalId(Arrays.asList(re.getOriginalId()));
header.setRecordType(RecordType.valueOf(re.getType()));
header.setStatus(Status.UNDER_CURATION); // TODO verify se il tipo e' corretto
header.setDeletedbyinference(false);
return header;
}
private Result prepareDnetResult(final ResultEntry re) {
// TODO Auto-generated method stub
return null;
}
private Provenance prepareDnetCollectedFrom(final ResultEntry re) {
final String dsId = re.getCollectedFromId();
final Provenance p = new Provenance();
p.setDsId(dsId);
try {
p.setDsName(dsmClient.findDatasource(dsId).getName());
} catch (final DirectIndexApiException e) {
log.warn("Invalid datasource id: " + dsId, e);
p.setDsName("UNRECOGNIZED DATASOURCE");
}
return p;
}
private RelatedRecord prepareDnetProjectLink(final String link) {
// TODO Auto-generated method stub
return null;
}
private Pid prepareDnetPid(final PidEntry pe) {
final Pid p = new Pid();
p.setValue(pe.getValue());
final String type = pe.getType();
p.setTypeCode(type);
try {
final String label = vocClient.findVocabulary("dnet:pid_types").get(type);
p.setTypeLabel(StringUtils.firstNonBlank(label, type));
} catch (final DirectIndexApiException e) {
log.warn("Problem accessing dnet:pid_types vocabulary", e);
p.setTypeLabel(type);
}
return p;
}
private List<Context> prepareDnetContext(final List<String> list) {
return list.stream()
.map(s -> {
try {
return communityClient.findCommunities(s);
} catch (final DirectIndexApiException e) {
log.warn("Problem finding community: " + s, e);
return Arrays.asList();
}
})
.flatMap(Collection::stream)
.map(ctx -> {
final Context context = new Context();
// TODO
return context;
})
.filter(Objects::nonNull)
.toList();
}
public String calculateOpenaireId(final String originalId, final String collectedFromId) throws DirectIndexApiException {
return calculateOpenaireId(originalId, dsmClient.findDatasource(collectedFromId));
}

View File

@ -47,9 +47,6 @@ public class DirectIndexService {
final PendingAction info = new PendingAction();
if (StringUtils.isNotBlank(r.getOpenaireId())) {
// TODO THE UPDATE SHOULD BE PERFORMED IN THE PREVIOUS METHOD
if (!r.getOpenaireId().matches("^\\w{12}::\\w{32}$")) {
throw new DirectIndexApiException("Invalid openaireId: " + r.getOpenaireId() + " - regex ^\\w{12}::\\w{32}$ not matched");
}
@ -82,11 +79,8 @@ public class DirectIndexService {
private void fixOpenaireId(final ResultEntry r) throws DirectIndexApiException {
final DatasourceEntry ds = StringUtils.isNotBlank(r.getCollectedFromId()) ? dsmClient.findDatasource(r.getCollectedFromId()) : UNKNOWN_REPO;
final String openaireId = ds.getPrefix() + "::" + DigestUtils.md5Hex(r.getOpenaireId());
r.setOpenaireId(openaireId);
}
}

View File

@ -42,6 +42,3 @@ spring.jpa.open-in-view=true
spring.jpa.properties.hibernate.show_sql=false
spring.jpa.properties.hibernate.use_sql_comments=false
spring.jpa.properties.hibernate.format_sql=false
openaire.service.islookup.wsdl = http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl