From f8468c9c2296905c0f5102df78a797f4ab0ccf7c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 1 Dec 2020 20:09:35 +0100 Subject: [PATCH 1/3] added extention for new author pid (orcid_pending) --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 5 ++++- .../PrepareResultOrcidAssociationStep1.java | 3 ++- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- .../dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 5 ++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 1cc41c395..d38d79fec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,7 +44,10 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID = "ORCID"; + public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; + public static final String ORCID = "orcid"; + public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 869831ba2..1e445828c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -102,7 +102,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3fc127064..3e5c4d641 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -176,7 +176,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,7 +201,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) { + if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || + ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index edd2e7ba7..aeaa8a3c1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; +import eu.dnetlib.dhp.PropagationConstant; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -166,7 +168,8 @@ public class OrcidPropagationJobTest { propagatedAuthors .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " - + "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'") + + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + + PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From 51c582c08ccceaf428aa5ff4998afa4a2cead26a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:12:54 +0100 Subject: [PATCH 2/3] added orcid class name among the constants set --- .../main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 0b4d29c8e..1efa86586 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -9,6 +9,7 @@ public class ModelConstants { public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; + public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; From cd285e98bc255e153dac982665e08698af39f10f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:13:23 +0100 Subject: [PATCH 3/3] usoing the constants defined in the ModelConstants class --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 4 +--- .../PrepareResultOrcidAssociationStep1.java | 5 +++-- .../SparkOrcidToResultFromSemRelJob.java | 7 ++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index d38d79fec..360cf5ffa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,9 +44,7 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; - public static final String ORCID = "orcid"; - public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 1e445828c..99ae1ee2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -102,8 +103,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " - +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3e5c4d641..55f18007d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -176,7 +177,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,8 +202,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || - ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { + if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || + ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } }