introduced mapping for ExternalReferences, made urls defined within an instance unique

This commit is contained in:
Claudio Atzori 2020-05-08 09:43:26 +02:00
parent 8c67073a07
commit 62ea19f1d3
4 changed files with 58 additions and 27 deletions

View File

@ -49,6 +49,9 @@ public class ModelConstants {
public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant";
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES);

View File

@ -1,12 +1,10 @@
package eu.dnetlib.dhp.actionmanager.migration;
import static eu.dnetlib.data.proto.KindProtos.Kind.entity;
import static eu.dnetlib.data.proto.KindProtos.Kind.relation;
import static eu.dnetlib.data.proto.TypeProtos.*;
import static eu.dnetlib.data.proto.TypeProtos.Type.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class ProtoConverter implements Serializable {
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
public static Oaf convert(OafProtos.Oaf oaf) {
try {
switch (oaf.getKind()) {
@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable {
case result:
final Result r = convertResult(oaf);
r.setInstance(convertInstances(oaf));
r.setExternalReference(convertExternalRefs(oaf));
return r;
case project:
return convertProject(oaf);
@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable {
i.setHostedby(mapKV(ri.getHostedby()));
i.setInstancetype(mapQualifier(ri.getInstancetype()));
i.setLicense(mapStringField(ri.getLicense()));
i.setUrl(ri.getUrlList());
i
.setUrl(
ri.getUrlList() != null ? ri
.getUrlList()
.stream()
.distinct()
.collect(Collectors.toCollection(ArrayList::new)) : null);
i.setRefereed(mapStringField(ri.getRefereed()));
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
return i;
}
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
ResultProtos.Result r = oaf.getEntity().getResult();
if (r.getExternalReferenceCount() > 0) {
return r
.getExternalReferenceList()
.stream()
.map(e -> convertExtRef(e))
.collect(Collectors.toList());
}
return Lists.newArrayList();
}
private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
ExternalReference ex = new ExternalReference();
ex.setUrl(e.getUrl());
ex.setSitename(e.getSitename());
ex.setRefidentifier(e.getRefidentifier());
ex.setQuery(e.getQuery());
ex.setQualifier(mapQualifier(e.getQualifier()));
ex.setLabel(e.getLabel());
ex.setDescription(e.getDescription());
ex.setDataInfo(ex.getDataInfo());
return ex;
}
private static Organization convertOrganization(OafProtos.Oaf oaf) {
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf);

View File

@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency(
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
for (final Object o : doc.selectNodes("//dc:identifier")) {
final String url = ((Node) o).getText().trim();
if (url.startsWith("http")) {
instance.setUrl(Arrays.asList(url));
}
}
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
instance
.setUrl(
nodes
.stream()
.filter(n -> StringUtils.isNotBlank(n.getText()))
.map(n -> n.getText().trim())
.filter(u -> u.startsWith("http"))
.distinct()
.collect(Collectors.toCollection(ArrayList::new)));
return Lists.newArrayList(instance);
}

View File

@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final KeyValue hostedby) {
final Instance instance = new Instance();
final Set<String> url = new HashSet<>();
instance.setUrl(new ArrayList<>());
instance
.setInstancetype(
@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
instance.getUrl().add(((Node) o).getText().trim());
url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
instance.getUrl().add(((Node) o).getText().trim());
url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
instance.getUrl().addAll(url);
return Arrays.asList(instance);
}