From 62ea19f1d394965b210ad1f7a68853c981e174aa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 09:43:26 +0200 Subject: [PATCH] introduced mapping for ExternalReferences, made urls defined within an instance unique --- .../dhp/schema/common/ModelConstants.java | 3 ++ .../migration/ProtoConverter.java | 44 +++++++++++++++---- .../dhp/oa/graph/raw/OafToOafMapper.java | 23 +++++----- .../dhp/oa/graph/raw/OdfToOafMapper.java | 15 +++---- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 926b021103..accc06d122 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -49,6 +49,9 @@ public class ModelConstants { public static final String HAS_PARTICIPANT = "hasParticipant"; public static final String IS_PARTICIPANT = "isParticipant"; + public static final String UNKNOWN = "UNKNOWN"; + public static final String NOT_AVAILABLE = "not available"; + public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier( PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID, DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index 456113c438..90d573ac07 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -1,12 +1,10 @@ package eu.dnetlib.dhp.actionmanager.migration; -import static eu.dnetlib.data.proto.KindProtos.Kind.entity; -import static eu.dnetlib.data.proto.KindProtos.Kind.relation; -import static eu.dnetlib.data.proto.TypeProtos.*; -import static eu.dnetlib.data.proto.TypeProtos.Type.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*; public class ProtoConverter implements Serializable { - public static final String UNKNOWN = "UNKNOWN"; - public static final String NOT_AVAILABLE = "not available"; - public static final String DNET_ACCESS_MODES = "dnet:access_modes"; - public static Oaf convert(OafProtos.Oaf oaf) { try { switch (oaf.getKind()) { @@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable { case result: final Result r = convertResult(oaf); r.setInstance(convertInstances(oaf)); + r.setExternalReference(convertExternalRefs(oaf)); return r; case project: return convertProject(oaf); @@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable { i.setHostedby(mapKV(ri.getHostedby())); i.setInstancetype(mapQualifier(ri.getInstancetype())); i.setLicense(mapStringField(ri.getLicense())); - i.setUrl(ri.getUrlList()); + i + .setUrl( + ri.getUrlList() != null ? ri + .getUrlList() + .stream() + .distinct() + .collect(Collectors.toCollection(ArrayList::new)) : null); i.setRefereed(mapStringField(ri.getRefereed())); i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount())); i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency())); return i; } + private static List convertExternalRefs(OafProtos.Oaf oaf) { + ResultProtos.Result r = oaf.getEntity().getResult(); + if (r.getExternalReferenceCount() > 0) { + return r + .getExternalReferenceList() + .stream() + .map(e -> convertExtRef(e)) + .collect(Collectors.toList()); + } + return Lists.newArrayList(); + } + + private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) { + ExternalReference ex = new ExternalReference(); + ex.setUrl(e.getUrl()); + ex.setSitename(e.getSitename()); + ex.setRefidentifier(e.getRefidentifier()); + ex.setQuery(e.getQuery()); + ex.setQualifier(mapQualifier(e.getQualifier())); + ex.setLabel(e.getLabel()); + ex.setDescription(e.getDescription()); + ex.setDataInfo(ex.getDataInfo()); + return ex; + } + private static Organization convertOrganization(OafProtos.Oaf oaf) { final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata(); final Organization org = setOaf(new Organization(), oaf); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 08a89cb228..891fee57e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency( field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - for (final Object o : doc.selectNodes("//dc:identifier")) { - final String url = ((Node) o).getText().trim(); - if (url.startsWith("http")) { - instance.setUrl(Arrays.asList(url)); - } - } + List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); + instance + .setUrl( + nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); + return Lists.newArrayList(instance); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 92a37c0675..04984d0086 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final KeyValue hostedby) { final Instance instance = new Instance(); + final Set url = new HashSet<>(); instance.setUrl(new ArrayList<>()); instance .setInstancetype( @@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { - instance.getUrl().add(((Node) o).getText().trim()); + url.add(((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { - instance.getUrl().add(((Node) o).getText().trim()); + url.add(((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { - instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); + url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { - instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); + url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } + instance.getUrl().addAll(url); return Arrays.asList(instance); }