diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java index 7a7a9a89cd..79138d641e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java @@ -1,14 +1,25 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.List; import java.util.Objects; public abstract class Oaf implements Serializable { + protected List collectedfrom; + private DataInfo dataInfo; private Long lastupdatetimestamp; + public List getCollectedfrom() { + return collectedfrom; + } + + public void setCollectedfrom(List collectedfrom) { + this.collectedfrom = collectedfrom; + } + public DataInfo getDataInfo() { return dataInfo; } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index 86f4ff616f..6db3b7f745 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -10,8 +10,6 @@ public abstract class OafEntity extends Oaf implements Serializable { private List originalId; - private List collectedfrom; - private List pid; private String dateofcollection; @@ -38,14 +36,6 @@ public abstract class OafEntity extends Oaf implements Serializable { this.originalId = originalId; } - public List getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(List collectedfrom) { - this.collectedfrom = collectedfrom; - } - public List getPid() { return pid; } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 9d211d0586..aaeecb8716 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -18,8 +18,6 @@ public class Relation extends Oaf { private String target; - private List collectedFrom = new ArrayList<>(); - public String getRelType() { return relType; } @@ -60,14 +58,6 @@ public class Relation extends Oaf { this.target = target; } - public List getCollectedFrom() { - return collectedFrom; - } - - public void setCollectedFrom(final List collectedFrom) { - this.collectedFrom = collectedFrom; - } - public void mergeFrom(final Relation r) { checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal"); @@ -77,12 +67,12 @@ public class Relation extends Oaf { Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal"); - setCollectedFrom( + setCollectedfrom( Stream.concat( - Optional.ofNullable(getCollectedFrom()) + Optional.ofNullable(getCollectedfrom()) .map(Collection::stream) .orElse(Stream.empty()), - Optional.ofNullable(r.getCollectedFrom()) + Optional.ofNullable(r.getCollectedfrom()) .map(Collection::stream) .orElse(Stream.empty())) .distinct() // relies on KeyValue.equals @@ -103,6 +93,6 @@ public class Relation extends Oaf { @Override public int hashCode() { - return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom); + return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom); } } diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index c56ded1458..adeffa9fcd 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -46,7 +46,7 @@ public class ProtoConverter implements Serializable { rel.setRelType(r.getRelType().toString()); rel.setSubRelType(r.getSubRelType().toString()); rel.setRelClass(r.getRelClass()); - rel.setCollectedFrom( + rel.setCollectedfrom( r.getCollectedfromCount() > 0 ? r.getCollectedfromList().stream() .map(kv -> mapKV(kv)) diff --git a/dhp-workflows/dhp-actionmanager/src/test/resources/eu/dnetlib/dhp/actionmanager/promote/output/graph/merge_from_and_get/relation/relation_action_payload/result.json b/dhp-workflows/dhp-actionmanager/src/test/resources/eu/dnetlib/dhp/actionmanager/promote/output/graph/merge_from_and_get/relation/relation_action_payload/result.json index bc65230a84..36d3fdd272 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/resources/eu/dnetlib/dhp/actionmanager/promote/output/graph/merge_from_and_get/relation/relation_action_payload/result.json +++ b/dhp-workflows/dhp-actionmanager/src/test/resources/eu/dnetlib/dhp/actionmanager/promote/output/graph/merge_from_and_get/relation/relation_action_payload/result.json @@ -1,10 +1,10 @@ -{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"} -{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index f1058db385..a7aa600cff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -1,9 +1,38 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.keyValue; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import eu.dnetlib.dhp.schema.oaf.*; -import java.util.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OAIProvenance; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentFactory; @@ -29,6 +58,12 @@ public abstract class AbstractMdRecordToOafMapper { qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies"); protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER = qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies"); + protected static final Qualifier REPOSITORY_QUALIFIER = + qualifier( + "sysimport:crosswalk:repository", + "sysimport:crosswalk:repository", + "dnet:provenanceActions", + "dnet:provenanceActions"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -55,13 +90,13 @@ public abstract class AbstractMdRecordToOafMapper { final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = keyValue( - doc.valueOf("//oaf:collectedFrom/@id"), + createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true), doc.valueOf("//oaf:collectedFrom/@name")); final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : keyValue( - doc.valueOf("//oaf:hostedBy/@id"), + createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true), doc.valueOf("//oaf:hostedBy/@name")); final DataInfo info = prepareDataInfo(doc); @@ -154,7 +189,7 @@ public abstract class AbstractMdRecordToOafMapper { r1.setRelClass("isProducedBy"); r1.setSource(docId); r1.setTarget(projectId); - r1.setCollectedFrom(Arrays.asList(collectedFrom)); + r1.setCollectedfrom(Arrays.asList(collectedFrom)); r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); res.add(r1); @@ -165,7 +200,7 @@ public abstract class AbstractMdRecordToOafMapper { r2.setRelClass("produces"); r2.setSource(projectId); r2.setTarget(docId); - r2.setCollectedFrom(Arrays.asList(collectedFrom)); + r2.setCollectedfrom(Arrays.asList(collectedFrom)); r2.setDataInfo(info); r2.setLastupdatetimestamp(lastUpdateTimestamp); res.add(r2); @@ -398,7 +433,7 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = doc.selectSingleNode("//oaf:datainfo"); if (n == null) { - return null; + return dataInfo(false, null, false, false, REPOSITORY_QUALIFIER, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 6de7303c35..476e617d78 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,11 +1,35 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.DbClient; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import java.io.Closeable; import java.io.IOException; import java.sql.Array; @@ -119,7 +143,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); ds.setCollectedfrom( listKeyValues( - rs.getString("collectedfromid"), rs.getString("collectedfromname"))); + createOpenaireId(10, rs.getString("collectedfromid"), true), + rs.getString("collectedfromname"))); ds.setPid(new ArrayList<>()); ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setDateoftransformation(null); // Value not returned by the SQL query @@ -185,7 +210,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication p.setOriginalId(Arrays.asList(rs.getString("projectid"))); p.setCollectedfrom( listKeyValues( - rs.getString("collectedfromid"), rs.getString("collectedfromname"))); + createOpenaireId(10, rs.getString("collectedfromid"), true), + rs.getString("collectedfromname"))); p.setPid(new ArrayList<>()); p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); @@ -240,7 +266,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); o.setCollectedfrom( listKeyValues( - rs.getString("collectedfromid"), rs.getString("collectedfromname"))); + createOpenaireId(10, rs.getString("collectedfromid"), true), + rs.getString("collectedfromname"))); o.setPid(new ArrayList<>()); o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); @@ -285,7 +312,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final String dsId = createOpenaireId(10, rs.getString("datasource"), true); final List collectedFrom = listKeyValues( - rs.getString("collectedfromid"), rs.getString("collectedfromname")); + createOpenaireId(10, rs.getString("collectedfromid"), true), + rs.getString("collectedfromname")); final Relation r1 = new Relation(); r1.setRelType("datasourceOrganization"); @@ -293,7 +321,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication r1.setRelClass("isProvidedBy"); r1.setSource(dsId); r1.setTarget(orgId); - r1.setCollectedFrom(collectedFrom); + r1.setCollectedfrom(collectedFrom); r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); @@ -303,7 +331,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication r2.setRelClass("provides"); r2.setSource(orgId); r2.setTarget(dsId); - r2.setCollectedFrom(collectedFrom); + r2.setCollectedfrom(collectedFrom); r2.setDataInfo(info); r2.setLastupdatetimestamp(lastUpdateTimestamp); @@ -320,7 +348,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final String projectId = createOpenaireId(40, rs.getString("project"), true); final List collectedFrom = listKeyValues( - rs.getString("collectedfromid"), rs.getString("collectedfromname")); + createOpenaireId(10, rs.getString("collectedfromid"), true), + rs.getString("collectedfromname")); final Relation r1 = new Relation(); r1.setRelType("projectOrganization"); @@ -328,7 +357,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication r1.setRelClass("isParticipant"); r1.setSource(projectId); r1.setTarget(orgId); - r1.setCollectedFrom(collectedFrom); + r1.setCollectedfrom(collectedFrom); r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); @@ -338,7 +367,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication r2.setRelClass("hasParticipant"); r2.setSource(orgId); r2.setTarget(projectId); - r2.setCollectedFrom(collectedFrom); + r2.setCollectedfrom(collectedFrom); r2.setDataInfo(info); r2.setLastupdatetimestamp(lastUpdateTimestamp); @@ -363,6 +392,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication "dnet:provenanceActions"), "0.9"); + final List collectedFrom = + listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); + try { if (rs.getString("source_type").equals("context")) { @@ -381,6 +413,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication r.setLastupdatetimestamp(lastUpdateTimestamp); r.setContext(prepareContext(rs.getString("source_id"), info)); r.setDataInfo(info); + r.setCollectedfrom(collectedFrom); return Arrays.asList(r); } else { @@ -395,18 +428,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final Relation r2 = new Relation(); if (rs.getString("source_type").equals("project")) { + r1.setCollectedfrom(collectedFrom); r1.setRelType("resultProject"); r1.setSubRelType("outcome"); r1.setRelClass("produces"); + r2.setCollectedfrom(collectedFrom); r2.setRelType("resultProject"); r2.setSubRelType("outcome"); r2.setRelClass("isProducedBy"); } else { + r1.setCollectedfrom(collectedFrom); r1.setRelType("resultResult"); r1.setSubRelType("relationship"); r1.setRelClass("isRelatedTo"); + r2.setCollectedfrom(collectedFrom); r2.setRelType("resultResult"); r2.setSubRelType("relationship"); r2.setRelClass("isRelatedTo"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index e77b1f87fe..6b304c9b0b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -232,7 +232,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { r1.setRelClass("isRelatedTo"); r1.setSource(docId); r1.setTarget(otherId); - r1.setCollectedFrom(Arrays.asList(collectedFrom)); + r1.setCollectedfrom(Arrays.asList(collectedFrom)); r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); res.add(r1); @@ -243,7 +243,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { r2.setRelClass("isRelatedTo"); r2.setSource(otherId); r2.setTarget(docId); - r2.setCollectedFrom(Arrays.asList(collectedFrom)); + r2.setCollectedfrom(Arrays.asList(collectedFrom)); r2.setDataInfo(info); r2.setLastupdatetimestamp(lastUpdateTimestamp); res.add(r2); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 5e64624170..f6fab499bf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -334,7 +334,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { r.setRelClass(relClass); r.setSource(source); r.setTarget(target); - r.setCollectedFrom(Arrays.asList(collectedFrom)); + r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); return r; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java index bfd5d5c7f4..4d3cb02d68 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java @@ -186,7 +186,7 @@ public abstract class AbstractScholexplorerParser { r.setTarget(targetId); r.setRelType(relationSemantic); r.setRelClass("datacite"); - r.setCollectedFrom(parsedObject.getCollectedfrom()); + r.setCollectedfrom(parsedObject.getCollectedfrom()); r.setDataInfo(di); rels.add(r); r = new DLIRelation(); @@ -195,7 +195,7 @@ public abstract class AbstractScholexplorerParser { r.setTarget(parsedObject.getId()); r.setRelType(inverseRelation); r.setRelClass("datacite"); - r.setCollectedFrom(parsedObject.getCollectedfrom()); + r.setCollectedfrom(parsedObject.getCollectedfrom()); r.setDateOfCollection(dateOfCollection); rels.add(r); if ("unknown".equalsIgnoreCase(relatedType)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6af314d76e..b09c5d6385 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,11 +1,16 @@ package eu.dnetlib.dhp.oa.graph.raw; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; import java.io.IOException; import java.util.List; import java.util.Map; @@ -43,6 +48,7 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(p.getAuthor().size() > 0); assertTrue(p.getSubject().size() > 0); @@ -50,13 +56,24 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); assertValidId(r1.getSource()); + assertValidId(r1.getTarget()); assertValidId(r2.getSource()); + assertValidId(r2.getTarget()); + assertValidId(r1.getCollectedfrom().get(0).getKey()); + assertValidId(r2.getCollectedfrom().get(0).getKey()); + assertNotNull(r1.getDataInfo()); + assertNotNull(r2.getDataInfo()); + assertNotNull(r1.getDataInfo().getTrust()); + assertNotNull(r2.getDataInfo().getTrust()); assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r2.getSource(), r1.getTarget()); assertTrue(StringUtils.isNotBlank(r1.getRelClass())); assertTrue(StringUtils.isNotBlank(r2.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); + + // System.out.println(new ObjectMapper().writeValueAsString(r1)); + // System.out.println(new ObjectMapper().writeValueAsString(r2)); } @Test @@ -65,15 +82,35 @@ public class MappersTest { final List list = new OdfToOafMapper(code2name).processMdRecord(xml); - assertEquals(1, list.size()); + assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Dataset); + assertTrue(list.get(1) instanceof Relation); + assertTrue(list.get(2) instanceof Relation); final Dataset d = (Dataset) list.get(0); + final Relation r1 = (Relation) list.get(1); + final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); assertTrue(d.getSubject().size() > 0); + + assertValidId(r1.getSource()); + assertValidId(r1.getTarget()); + assertValidId(r2.getSource()); + assertValidId(r2.getTarget()); + assertNotNull(r1.getDataInfo()); + assertNotNull(r2.getDataInfo()); + assertNotNull(r1.getDataInfo().getTrust()); + assertNotNull(r2.getDataInfo().getTrust()); + assertEquals(r1.getSource(), r2.getTarget()); + assertEquals(r2.getSource(), r1.getTarget()); + assertTrue(StringUtils.isNotBlank(r1.getRelClass())); + assertTrue(StringUtils.isNotBlank(r2.getRelClass())); + assertTrue(StringUtils.isNotBlank(r1.getRelType())); + assertTrue(StringUtils.isNotBlank(r2.getRelType())); } @Test @@ -88,6 +125,7 @@ public class MappersTest { final Software s = (Software) list.get(0); assertValidId(s.getId()); + assertValidId(s.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(s.getTitle().get(0).getValue())); assertTrue(s.getAuthor().size() > 0); assertTrue(s.getSubject().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 3c7bc684d3..b88731ba24 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -1,10 +1,17 @@ package eu.dnetlib.dhp.oa.graph.raw; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import java.io.IOException; import java.sql.Array; import java.sql.Date; @@ -13,6 +20,7 @@ import java.sql.SQLException; import java.util.List; import java.util.Objects; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -42,14 +50,13 @@ public class MigrateDbEntitiesApplicationTest { final Datasource ds = (Datasource) list.get(0); assertValidId(ds.getId()); + assertValidId(ds.getCollectedfrom().get(0).getKey()); assertEquals(ds.getOfficialname().getValue(), getValueAsString("officialname", fields)); assertEquals(ds.getEnglishname().getValue(), getValueAsString("englishname", fields)); assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals( ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); - assertEquals( - ds.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields)); assertEquals( ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); @@ -65,10 +72,9 @@ public class MigrateDbEntitiesApplicationTest { final Project p = (Project) list.get(0); assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); - assertEquals( - p.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields)); assertEquals( p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); @@ -86,6 +92,7 @@ public class MigrateDbEntitiesApplicationTest { final Organization o = (Organization) list.get(0); assertValidId(o.getId()); + assertValidId(o.getCollectedfrom().get(0).getKey()); assertEquals(o.getLegalshortname().getValue(), getValueAsString("legalshortname", fields)); assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); @@ -98,8 +105,6 @@ public class MigrateDbEntitiesApplicationTest { assertEquals( o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]); - assertEquals( - o.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields)); assertEquals( o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); @@ -137,6 +142,8 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(r2.getSource()); assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r2.getSource(), r1.getTarget()); + assertValidId(r1.getCollectedfrom().get(0).getKey()); + assertValidId(r2.getCollectedfrom().get(0).getKey()); } @Test @@ -146,7 +153,12 @@ public class MigrateDbEntitiesApplicationTest { final List list = app.processClaims(rs); assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Result); + final Result r = (Result) list.get(0); + verifyMocks(fields); + + assertValidId(r.getCollectedfrom().get(0).getKey()); } @Test @@ -157,6 +169,33 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(2, list.size()); verifyMocks(fields); + + assertTrue(list.get(0) instanceof Relation); + assertTrue(list.get(1) instanceof Relation); + + final Relation r1 = (Relation) list.get(0); + final Relation r2 = (Relation) list.get(1); + + assertValidId(r1.getSource()); + assertValidId(r1.getTarget()); + assertValidId(r2.getSource()); + assertValidId(r2.getTarget()); + assertNotNull(r1.getDataInfo()); + assertNotNull(r2.getDataInfo()); + assertNotNull(r1.getDataInfo().getTrust()); + assertNotNull(r2.getDataInfo().getTrust()); + assertEquals(r1.getSource(), r2.getTarget()); + assertEquals(r2.getSource(), r1.getTarget()); + assertTrue(StringUtils.isNotBlank(r1.getRelClass())); + assertTrue(StringUtils.isNotBlank(r2.getRelClass())); + assertTrue(StringUtils.isNotBlank(r1.getRelType())); + assertTrue(StringUtils.isNotBlank(r2.getRelType())); + + assertValidId(r1.getCollectedfrom().get(0).getKey()); + assertValidId(r2.getCollectedfrom().get(0).getKey()); + + // System.out.println(new ObjectMapper().writeValueAsString(r1)); + // System.out.println(new ObjectMapper().writeValueAsString(r2)); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 0c36e8686f..94dc802fac 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -87,6 +87,7 @@ und + corda_______::226852 0) s.setPublicationDate(scholixSummary.getDate().get(0)); s.setLinkprovider( - rel.getCollectedFrom().stream() + rel.getCollectedfrom().stream() .map( cf -> new ScholixEntityId( @@ -73,7 +73,7 @@ public class Scholix implements Serializable { if (scholixSummary.getDate() != null && scholixSummary.getDate().size() > 0) s.setPublicationDate(scholixSummary.getDate().get(0)); s.setLinkprovider( - rel.getCollectedFrom().stream() + rel.getCollectedfrom().stream() .map( cf -> new ScholixEntityId( diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/provision/relation.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/provision/relation.json index e029ddf625..3cca6e370f 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/provision/relation.json +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/provision/relation.json @@ -1 +1 @@ -{"dataInfo":{"invisible":false,"inferred":null,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":null},"lastupdatetimestamp":null,"relType":"IsReferencedBy","subRelType":null,"relClass":"datacite","source":"50|dedup_______::4f00e4f0e82bb4cbb35261478e55568e","target":"60|97519e00ee2cddfa1f5bcb5220429b8f","collectedFrom":[{"key":"dli_________::europe_pmc__","value":"Europe PMC","dataInfo":null}]} \ No newline at end of file +{"dataInfo":{"invisible":false,"inferred":null,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":null},"lastupdatetimestamp":null,"relType":"IsReferencedBy","subRelType":null,"relClass":"datacite","source":"50|dedup_______::4f00e4f0e82bb4cbb35261478e55568e","target":"60|97519e00ee2cddfa1f5bcb5220429b8f","collectedfrom":[{"key":"dli_________::europe_pmc__","value":"Europe PMC","dataInfo":null}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index a6c261ec72..b663a8082e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -95,7 +95,18 @@ public class PrepareRelationsJob { readPathRelation(spark, inputRelationsPath) .filter( (FilterFunction) - value -> value.getDataInfo().getDeletedbyinference() == false) + r -> { + try { + return r != null + && r.getDataInfo() != null + && !r.getDataInfo().getDeletedbyinference(); + } catch (NullPointerException e) { + log.info( + "invalid NPE '{}'", + OBJECT_MAPPER.writeValueAsString(r)); + throw e; + } + }) .groupByKey( (MapFunction) value -> value.getSource(), Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 3455cb5f3b..874c34c090 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; @@ -41,6 +42,7 @@ import org.dom4j.io.XMLWriter; public class XmlRecordFactory implements Serializable { + public static final String REL_SUBTYPE_DEDUP = "dedup"; private Map accumulators; private Set specialDatasourceTypes; @@ -91,7 +93,14 @@ public class XmlRecordFactory implements Serializable { // rels has to be processed before the contexts because they enrich the contextMap with // the // funding info. - final List relations = listRelations(je, templateFactory, contexts); + final List relations = + je.getLinks().stream() + .filter( + t -> + !REL_SUBTYPE_DEDUP.equalsIgnoreCase( + t.getRelation().getSubRelType())) + .map(link -> mapRelation(link, templateFactory, contexts)) + .collect(Collectors.toCollection(ArrayList::new)); final String mainType = ModelSupport.getMainType(type); metadata.addAll(buildContexts(mainType, contexts)); @@ -102,7 +111,7 @@ public class XmlRecordFactory implements Serializable { mainType, metadata, relations, - listChildren(entity, je.getEntity().getType(), templateFactory), + listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); @@ -919,171 +928,149 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); } - private Qualifier getBestAccessright(final Result r) { - Qualifier bestAccessRight = new Qualifier(); - bestAccessRight.setClassid("UNKNOWN"); - bestAccessRight.setClassname("not available"); - bestAccessRight.setSchemeid("dnet:access_modes"); - bestAccessRight.setSchemename("dnet:access_modes"); + private String mapRelation(Tuple2 link, TemplateFactory templateFactory, Set contexts) { + final Relation rel = link.getRelation(); + final RelatedEntity re = link.getRelatedEntity(); + final String targetType = link.getRelatedEntity().getType(); - final LicenseComparator lc = new LicenseComparator(); - for (final Instance instance : r.getInstance()) { - if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) { - bestAccessRight = instance.getAccessright(); - } + final List metadata = Lists.newArrayList(); + switch (EntityType.valueOf(targetType)) { + case publication: + case dataset: + case otherresearchproduct: + case software: + if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) { + metadata.add( + XmlSerializationUtils.mapStructuredProperty("title", re.getTitle())); + } + if (isNotBlank(re.getDateofacceptance())) { + metadata.add( + XmlSerializationUtils.asXmlElement( + "dateofacceptance", re.getDateofacceptance())); + } + if (isNotBlank(re.getPublisher())) { + metadata.add( + XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); + } + if (isNotBlank(re.getCodeRepositoryUrl())) { + metadata.add( + XmlSerializationUtils.asXmlElement( + "coderepositoryurl", re.getCodeRepositoryUrl())); + } + if (re.getResulttype() != null & re.getResulttype().isBlank()) { + metadata.add( + XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); + } + if (re.getCollectedfrom() != null) { + metadata.addAll( + re.getCollectedfrom().stream() + .map( + kv -> + XmlSerializationUtils.mapKeyValue( + "collectedfrom", kv)) + .collect(Collectors.toList())); + } + if (re.getPid() != null) { + metadata.addAll( + re.getPid().stream() + .map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p)) + .collect(Collectors.toList())); + } + break; + case datasource: + if (isNotBlank(re.getOfficialname())) { + metadata.add( + XmlSerializationUtils.asXmlElement( + "officialname", re.getOfficialname())); + } + if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) { + mapDatasourceType(metadata, re.getDatasourcetype()); + } + if (re.getOpenairecompatibility() != null + & !re.getOpenairecompatibility().isBlank()) { + metadata.add( + XmlSerializationUtils.mapQualifier( + "openairecompatibility", re.getOpenairecompatibility())); + } + break; + case organization: + if (isNotBlank(re.getLegalname())) { + metadata.add( + XmlSerializationUtils.asXmlElement("legalname", re.getLegalname())); + } + if (isNotBlank(re.getLegalshortname())) { + metadata.add( + XmlSerializationUtils.asXmlElement( + "legalshortname", re.getLegalshortname())); + } + if (re.getCountry() != null & !re.getCountry().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); + } + break; + case project: + if (isNotBlank(re.getProjectTitle())) { + metadata.add(XmlSerializationUtils.asXmlElement("title", re.getProjectTitle())); + } + if (isNotBlank(re.getCode())) { + metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode())); + } + if (isNotBlank(re.getAcronym())) { + metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); + } + if (re.getContracttype() != null & !re.getContracttype().isBlank()) { + metadata.add( + XmlSerializationUtils.mapQualifier( + "contracttype", re.getContracttype())); + } + if (re.getFundingtree() != null & contexts != null) { + metadata.addAll( + re.getFundingtree().stream() + .peek(ft -> fillContextMap(ft, contexts)) + .map(ft -> getRelFundingTree(ft)) + .collect(Collectors.toList())); + } + break; + default: + throw new IllegalArgumentException("invalid target type: " + targetType); } - return bestAccessRight; - } + final DataInfo info = rel.getDataInfo(); + final String scheme = ModelSupport.getScheme(re.getType(), targetType); - private List listRelations( - final JoinedEntity je, TemplateFactory templateFactory, final Set contexts) { - final List rels = Lists.newArrayList(); - - for (final Tuple2 link : je.getLinks()) { - - final Relation rel = link.getRelation(); - final RelatedEntity re = link.getRelatedEntity(); - final String targetType = link.getRelatedEntity().getType(); - - final List metadata = Lists.newArrayList(); - switch (EntityType.valueOf(targetType)) { - case publication: - case dataset: - case otherresearchproduct: - case software: - if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) { - metadata.add( - XmlSerializationUtils.mapStructuredProperty( - "title", re.getTitle())); - } - if (isNotBlank(re.getDateofacceptance())) { - metadata.add( - XmlSerializationUtils.asXmlElement( - "dateofacceptance", re.getDateofacceptance())); - } - if (isNotBlank(re.getPublisher())) { - metadata.add( - XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); - } - if (isNotBlank(re.getCodeRepositoryUrl())) { - metadata.add( - XmlSerializationUtils.asXmlElement( - "coderepositoryurl", re.getCodeRepositoryUrl())); - } - if (re.getResulttype() != null & re.getResulttype().isBlank()) { - metadata.add( - XmlSerializationUtils.mapQualifier( - "resulttype", re.getResulttype())); - } - if (re.getCollectedfrom() != null) { - metadata.addAll( - re.getCollectedfrom().stream() - .map( - kv -> - XmlSerializationUtils.mapKeyValue( - "collectedfrom", kv)) - .collect(Collectors.toList())); - } - if (re.getPid() != null) { - metadata.addAll( - re.getPid().stream() - .map( - p -> - XmlSerializationUtils.mapStructuredProperty( - "pid", p)) - .collect(Collectors.toList())); - } - break; - case datasource: - if (isNotBlank(re.getOfficialname())) { - metadata.add( - XmlSerializationUtils.asXmlElement( - "officialname", re.getOfficialname())); - } - if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); - } - if (re.getOpenairecompatibility() != null - & !re.getOpenairecompatibility().isBlank()) { - metadata.add( - XmlSerializationUtils.mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); - } - break; - case organization: - if (isNotBlank(re.getLegalname())) { - metadata.add( - XmlSerializationUtils.asXmlElement("legalname", re.getLegalname())); - } - if (isNotBlank(re.getLegalshortname())) { - metadata.add( - XmlSerializationUtils.asXmlElement( - "legalshortname", re.getLegalshortname())); - } - if (re.getCountry() != null & !re.getCountry().isBlank()) { - metadata.add( - XmlSerializationUtils.mapQualifier("country", re.getCountry())); - } - break; - case project: - if (isNotBlank(re.getProjectTitle())) { - metadata.add( - XmlSerializationUtils.asXmlElement("title", re.getProjectTitle())); - } - if (isNotBlank(re.getCode())) { - metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode())); - } - if (isNotBlank(re.getAcronym())) { - metadata.add( - XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); - } - if (re.getContracttype() != null & !re.getContracttype().isBlank()) { - metadata.add( - XmlSerializationUtils.mapQualifier( - "contracttype", re.getContracttype())); - } - if (re.getFundingtree() != null) { - metadata.addAll( - re.getFundingtree().stream() - .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) - .collect(Collectors.toList())); - } - break; - default: - throw new IllegalArgumentException("invalid target type: " + targetType); - } - final DataInfo info = rel.getDataInfo(); - final String scheme = ModelSupport.getScheme(re.getType(), targetType); - - if (StringUtils.isBlank(scheme)) { - throw new IllegalArgumentException( - String.format("missing scheme for: <%s - %s>", re.getType(), targetType)); - } - - final String accumulatorName = - getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass()); - if (accumulators.containsKey(accumulatorName)) { - accumulators.get(accumulatorName).add(1); - } - - rels.add( - templateFactory.getRel( - targetType, - rel.getTarget(), - Sets.newHashSet(metadata), - rel.getRelClass(), - scheme, - info)); + if (StringUtils.isBlank(scheme)) { + throw new IllegalArgumentException( + String.format("missing scheme for: <%s - %s>", re.getType(), targetType)); } - return rels; + + final String accumulatorName = + getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass()); + if (accumulators.containsKey(accumulatorName)) { + accumulators.get(accumulatorName).add(1); + } + + return templateFactory.getRel( + targetType, + rel.getTarget(), + Sets.newHashSet(metadata), + rel.getRelClass(), + scheme, + info); } private List listChildren( - final OafEntity entity, String type, TemplateFactory templateFactory) { + final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { final List children = Lists.newArrayList(); - EntityType entityType = EntityType.valueOf(type); + EntityType entityType = EntityType.valueOf(je.getEntity().getType()); + + children.addAll( + je.getLinks().stream() + .filter( + link -> + REL_SUBTYPE_DEDUP.equalsIgnoreCase( + link.getRelation().getSubRelType())) + .map(link -> mapRelation(link, templateFactory, null)) + .collect(Collectors.toCollection(ArrayList::new))); + if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) { final List instances = ((Result) entity).getInstance(); if (instances != null) {