From 34f1d0904b6f2142e32fe4b3dafddcd390424779 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 16 Oct 2020 16:00:19 +0200 Subject: [PATCH] common IdentifierFactory in use on the mapping from the aggregator data --- .../raw/AbstractMdRecordToOafMapper.java | 71 +++++++++---------- .../raw/MigrateDbEntitiesApplication.java | 30 ++++---- .../dhp/oa/graph/raw/OafToOafMapper.java | 24 ++----- .../dhp/oa/graph/raw/OdfToOafMapper.java | 37 ++++------ .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 40 ++++------- 5 files changed, 82 insertions(+), 120 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 5b6ae72f18..50fc89f042 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -6,6 +6,8 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.*; +import com.google.common.collect.Lists; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentFactory; @@ -133,20 +135,33 @@ public abstract class AbstractMdRecordToOafMapper { final DataInfo info, final long lastUpdateTimestamp) { - final List oafs = new ArrayList<>(); + final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); + final String id = IdentifierFactory.createIdentifier(entity); + if (!id.equals(entity.getId())) { + entity.getOriginalId().add(entity.getId()); + entity.setId(id); + } + final List oafs = Lists.newArrayList(entity); + + if (!oafs.isEmpty()) { + oafs.addAll(addProjectRels(doc, entity)); + oafs.addAll(addOtherResultRels(doc, entity)); + } + + return oafs; + } + + private OafEntity createEntity(Document doc, String type, List instances, KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { switch (type.toLowerCase()) { case "publication": final Publication p = new Publication(); populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; + return p; case "dataset": final Dataset d = new Dataset(); populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setDevice(prepareDatasetDevice(doc, info)); d.setSize(prepareDatasetSize(doc, info)); @@ -154,48 +169,34 @@ public abstract class AbstractMdRecordToOafMapper { d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; + return d; case "software": final Software s = new Software(); populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info)); s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; + return s; case "": case "otherresearchproducts": default: final OtherResearchProduct o = new OtherResearchProduct(); populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + return o; } - - if (!oafs.isEmpty()) { - oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp)); - oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp)); - } - - return oafs; } private List addProjectRels( final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { + final OafEntity entity) { final List res = new ArrayList<>(); - final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); + final String docId = entity.getId(); for (final Object o : doc.selectNodes("//oaf:projectid")) { @@ -207,13 +208,11 @@ public abstract class AbstractMdRecordToOafMapper { res .add( getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity)); res .add( getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity)); } } @@ -225,26 +224,22 @@ public abstract class AbstractMdRecordToOafMapper { final String relType, final String subRelType, final String relClass, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { + final OafEntity entity) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); rel.setRelClass(relClass); rel.setSource(source); rel.setTarget(target); - rel.setCollectedfrom(Arrays.asList(collectedFrom)); - rel.setDataInfo(info); - rel.setLastupdatetimestamp(lastUpdateTimestamp); + rel.setCollectedfrom(entity.getCollectedfrom()); + rel.setDataInfo(entity.getDataInfo()); + rel.setLastupdatetimestamp(entity.getLastupdatetimestamp()); return rel; } protected abstract List addOtherResultRels( final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp); + final OafEntity entity); private void populateResultFields( final Result r, @@ -257,7 +252,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); - r.setOriginalId(Arrays.asList(findOriginalId(doc))); + r.setOriginalId(Lists.newArrayList(findOriginalId(doc))); r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setPid(prepareResultPids(doc, info)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index adf7b92be2..30fdd17e96 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -445,22 +445,26 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); try { + final String targetType = rs.getString(TARGET_TYPE); if (rs.getString(SOURCE_TYPE).equals("context")) { final Result r; - if (rs.getString(TARGET_TYPE).equals("dataset")) { - r = new Dataset(); - r.setResulttype(DATASET_DEFAULT_RESULTTYPE); - } else if (rs.getString(TARGET_TYPE).equals("software")) { - r = new Software(); - r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - } else if (rs.getString(TARGET_TYPE).equals("other")) { - r = new OtherResearchProduct(); - r.setResulttype(ORP_DEFAULT_RESULTTYPE); - } else { - r = new Publication(); - r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + switch (targetType) { + case "dataset": + r = new Dataset(); + break; + case "software": + r = new Software(); + break; + case "other": + r = new OtherResearchProduct(); + break; + case "publication": + default: + r = new Publication(); + break; } + r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setContext(prepareContext(rs.getString("source_id"), info)); @@ -470,7 +474,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return Arrays.asList(r); } else { final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false); - final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false); + final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); final Relation r1 = new Relation(); final Relation r2 = new Relation(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index dea80fabda..135fcc595b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -10,6 +10,7 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; @@ -19,15 +20,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.GeoLocation; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { @@ -256,12 +248,10 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List addOtherResultRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { - final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); + final Document doc, + final OafEntity entity) { + final String docId = entity.getId(); final List res = new ArrayList<>(); for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) { @@ -275,13 +265,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { res .add( getRelation( - docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity)); res .add( getRelation( - otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity)); } } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 6fe7bb9713..522b3f247d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -12,6 +12,8 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; @@ -20,15 +22,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.GeoLocation; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @@ -313,12 +306,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List addOtherResultRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { + final Document doc, + final OafEntity entity) { - final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); + final String docId = entity.getId(); final List res = new ArrayList<>(); @@ -330,30 +321,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); final String type = ((Node) o).valueOf("@relationType"); - if (type.equalsIgnoreCase("IsSupplementTo")) { + if (type.equalsIgnoreCase(IS_SUPPLEMENT_TO)) { res .add( getRelation( - docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, - lastUpdateTimestamp)); + docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, entity)); res .add( getRelation( - otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, - lastUpdateTimestamp)); - } else if (type.equals("IsPartOf")) { - + otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, entity)); + } else if (type.equalsIgnoreCase(IS_PART_OF)) { res .add( getRelation( - docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, - lastUpdateTimestamp)); + docId, otherId, RESULT_RESULT, PART, IS_PART_OF, entity)); res .add( getRelation( - otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, - lastUpdateTimestamp)); + otherId, docId, RESULT_RESULT, PART, HAS_PARTS, entity)); } else { + // TODO catch more semantics } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 2c10f8f580..2f5466d495 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -24,14 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) @@ -71,7 +65,7 @@ public class MappersTest { assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); + assertTrue(p.getOriginalId().size() == 2); assertEquals("10.3897/oneeco.2.e13718", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); @@ -123,22 +117,7 @@ public class MappersTest { assertNotNull(p.getBestaccessright()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertValidId(r1.getSource()); - assertValidId(r1.getTarget()); - assertValidId(r2.getSource()); - assertValidId(r2.getTarget()); - assertValidId(r1.getCollectedfrom().get(0).getKey()); - assertValidId(r2.getCollectedfrom().get(0).getKey()); - assertNotNull(r1.getDataInfo()); - assertNotNull(r2.getDataInfo()); - assertNotNull(r1.getDataInfo().getTrust()); - assertNotNull(r2.getDataInfo().getTrust()); - assertEquals(r1.getSource(), r2.getTarget()); - assertEquals(r2.getSource(), r1.getTarget()); - assertTrue(StringUtils.isNotBlank(r1.getRelClass())); - assertTrue(StringUtils.isNotBlank(r2.getRelClass())); - assertTrue(StringUtils.isNotBlank(r1.getRelType())); - assertTrue(StringUtils.isNotBlank(r2.getRelType())); + verifyRelations(p, r1, r2); // System.out.println(new ObjectMapper().writeValueAsString(p)); // System.out.println(new ObjectMapper().writeValueAsString(r1)); @@ -177,7 +156,7 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); - assertTrue(d.getOriginalId().size() == 1); + assertTrue(d.getOriginalId().size() == 2); assertEquals("oai:zenodo.org:3234526", d.getOriginalId().get(0)); assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); @@ -230,10 +209,19 @@ public class MappersTest { }); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); + verifyRelations(d, r1, r2); + } + + private void verifyRelations(OafEntity e, Relation r1, Relation r2) { + assertEquals(e.getId(), r1.getSource()); + assertEquals(e.getId(), r2.getTarget()); + assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); assertValidId(r2.getTarget()); + assertValidId(r1.getCollectedfrom().get(0).getKey()); + assertValidId(r2.getCollectedfrom().get(0).getKey()); assertNotNull(r1.getDataInfo()); assertNotNull(r2.getDataInfo()); assertNotNull(r1.getDataInfo().getTrust());