added extension for new author pid (orcid_pending) #63

Merged
claudio.atzori merged 4 commits from miriam.baglioni/dnet-hadoop:master into master 2020-12-02 11:15:01 +01:00
4 changed files with 13 additions and 5 deletions
Showing only changes of commit f8468c9c22 - Show all commits

View File

@ -44,7 +44,10 @@ public class PropagationConstant {
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
public static final String PROPAGATION_AUTHOR_PID = "ORCID"; public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending";
public static final String ORCID = "orcid";
Review
Please remove this and refer to https://code-repo.d4science.org/D-Net/dnet-hadoop/src/branch/master/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java#L10
public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID";
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

View File

@ -102,7 +102,8 @@ public class PrepareResultOrcidAssociationStep1 {
+ " FROM result " + " FROM result "
+ " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp " + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or "
+" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp "
+ " GROUP BY id) r_t " + " GROUP BY id) r_t "
+ " JOIN (" + " JOIN ("
+ " SELECT source, target " + " SELECT source, target "

View File

@ -176,7 +176,7 @@ public class SparkOrcidToResultFromSemRelJob {
if (toaddpid) { if (toaddpid) {
StructuredProperty p = new StructuredProperty(); StructuredProperty p = new StructuredProperty();
p.setValue(autoritative_author.getOrcid()); p.setValue(autoritative_author.getOrcid());
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME));
p p
.setDataInfo( .setDataInfo(
getDataInfo( getDataInfo(
@ -201,7 +201,8 @@ public class SparkOrcidToResultFromSemRelJob {
return false; return false;
} }
for (StructuredProperty pid : pids.get()) { for (StructuredProperty pid : pids.get()) {
if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) { if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) ||
ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
return true; return true;
} }
} }

View File

@ -5,6 +5,8 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
import eu.dnetlib.dhp.PropagationConstant;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -166,7 +168,8 @@ public class OrcidPropagationJobTest {
propagatedAuthors propagatedAuthors
.filter( .filter(
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'") + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" +
PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'")
.count()); .count());
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());