From 4993666d73786a2ca420cbd5658732c0768203de Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 16:53:47 +0100 Subject: [PATCH 1/3] [BipFinderInstanceLevel] changed creation of the instance to allow to enrich existing instances with same pid --- .../main/java/eu/dnetlib/dhp/actionmanager/Constants.java | 1 + .../createunresolvedentities/PrepareBipFinder.java | 8 ++++++++ pom.xml | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 3a46228d8..b790d90cb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -18,6 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { public static final String DOI = "doi"; + public static final String DOI_CLASSNAME = "Digital Object Identifier"; public static final String DEFAULT_DELIMITER = ","; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index e9c9f0350..30cfce903 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -95,11 +96,18 @@ public class PrepareBipFinder implements Serializable { }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) .map((MapFunction) v -> { Result r = new Result(); + final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId()); r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); Instance inst = new Instance(); inst.setMeasures(getMeasure(v)); + + inst.setPid(Arrays.asList(OafMapperUtils.structuredProperty(cleanedPid, + OafMapperUtils.qualifier(DOI, DOI_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), null))); r.setInstance(Arrays.asList(inst)); + r.setDataInfo(OafMapperUtils.dataInfo(false,null,null,false, OafMapperUtils.qualifier(ModelConstants.PROVENANCE_ENRICH, null,ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))); return r; }, Encoders.bean(Result.class)) .write() diff --git a/pom.xml b/pom.xml index b68671aec..34733cee4 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.24] + [2.9.25-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From e7d5a39c035d9d273f562dc5151305f0d759c540 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 17:25:04 +0100 Subject: [PATCH 2/3] [BipFinderInstanceLevel] added tests in test class --- .../PrepareBipFinder.java | 33 +++++++++++++--- .../createunresolvedentities/ProduceTest.java | 39 +++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 30cfce903..80573c71a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -34,6 +33,7 @@ import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; @@ -102,12 +102,33 @@ public class PrepareBipFinder implements Serializable { Instance inst = new Instance(); inst.setMeasures(getMeasure(v)); - inst.setPid(Arrays.asList(OafMapperUtils.structuredProperty(cleanedPid, - OafMapperUtils.qualifier(DOI, DOI_CLASSNAME, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES), null))); + inst + .setPid( + Arrays + .asList( + OafMapperUtils + .structuredProperty( + cleanedPid, + OafMapperUtils + .qualifier( + DOI, DOI_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), + null))); r.setInstance(Arrays.asList(inst)); - r.setDataInfo(OafMapperUtils.dataInfo(false,null,null,false, OafMapperUtils.qualifier(ModelConstants.PROVENANCE_ENRICH, null,ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 02c6582f1..8c20475ff 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -245,6 +245,45 @@ public class ProduceTest { .get(0) .getValue()); + Assertions.assertEquals("10.3390/s18072310", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getValue().toLowerCase()); + + Assertions.assertEquals("doi", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getQualifier().getClassid()); + + Assertions.assertEquals("Digital Object Identifier", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getQualifier().getClassname()); + + } + + @Test + void produceTestMeasures() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + List mes = tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getPid().iterator()) + .collect(); + + Assertions.assertEquals(86, mes.size()); + + tmp.filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .foreach(e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); + } @Test From a75fb8c47ae0cd44398823a20ea2a24e1985d2ab Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 18:06:26 +0100 Subject: [PATCH 3/3] [BipFinderInstanceLevel] change pom to align to the dhp-schema release 2.10.24 and refactoring --- .../createunresolvedentities/ProduceTest.java | 71 ++++++++++++------- pom.xml | 2 +- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 8c20475ff..ce44f0036 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -245,26 +245,47 @@ public class ProduceTest { .get(0) .getValue()); - Assertions.assertEquals("10.3390/s18072310", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getValue().toLowerCase()); + Assertions + .assertEquals( + "10.3390/s18072310", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getValue() + .toLowerCase()); - Assertions.assertEquals("doi", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getQualifier().getClassid()); + Assertions + .assertEquals( + "doi", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassid()); - Assertions.assertEquals("Digital Object Identifier", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getQualifier().getClassname()); + Assertions + .assertEquals( + "Digital Object Identifier", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassname()); } @@ -274,15 +295,17 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDD(); List mes = tmp - .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(i -> i.getPid().iterator()) - .collect(); + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getPid().iterator()) + .collect(); Assertions.assertEquals(86, mes.size()); - tmp.filter(row -> row.getInstance() != null && row.getInstance().size() > 0) - .foreach(e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); + tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .foreach( + e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); } diff --git a/pom.xml b/pom.xml index a27f89f9f..7a026e668 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.25-SNAPSHOT] + [2.10.24] [4.0.3] [6.0.5] [3.1.6]