forked from D-Net/dnet-hadoop
Merge pull request 'added extension for new author pid (orcid_pending)' (#63) from miriam.baglioni/dnet-hadoop:master into master
LGTM
This commit is contained in:
commit
873c358d1d
|
@ -9,6 +9,7 @@ public class ModelConstants {
|
||||||
|
|
||||||
public static final String ORCID = "orcid";
|
public static final String ORCID = "orcid";
|
||||||
public static final String ORCID_PENDING = "orcid_pending";
|
public static final String ORCID_PENDING = "orcid_pending";
|
||||||
|
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
|
||||||
|
|
||||||
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
|
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
|
||||||
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
|
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
|
||||||
|
|
|
@ -44,7 +44,8 @@ public class PropagationConstant {
|
||||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
|
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
|
||||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
|
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
|
||||||
|
|
||||||
public static final String PROPAGATION_AUTHOR_PID = "ORCID";
|
|
||||||
|
|
||||||
|
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -102,7 +103,8 @@ public class PrepareResultOrcidAssociationStep1 {
|
||||||
+ " FROM result "
|
+ " FROM result "
|
||||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||||
+ " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp "
|
+ " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
|
||||||
|
+" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
|
||||||
+ " GROUP BY id) r_t "
|
+ " GROUP BY id) r_t "
|
||||||
+ " JOIN ("
|
+ " JOIN ("
|
||||||
+ " SELECT source, target "
|
+ " SELECT source, target "
|
||||||
|
|
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -176,7 +177,7 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
if (toaddpid) {
|
if (toaddpid) {
|
||||||
StructuredProperty p = new StructuredProperty();
|
StructuredProperty p = new StructuredProperty();
|
||||||
p.setValue(autoritative_author.getOrcid());
|
p.setValue(autoritative_author.getOrcid());
|
||||||
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
|
p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME));
|
||||||
p
|
p
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
|
@ -201,7 +202,8 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (StructuredProperty pid : pids.get()) {
|
for (StructuredProperty pid : pids.get()) {
|
||||||
if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) {
|
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) ||
|
||||||
|
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,8 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
|
||||||
|
import eu.dnetlib.dhp.PropagationConstant;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -166,7 +168,8 @@ public class OrcidPropagationJobTest {
|
||||||
propagatedAuthors
|
propagatedAuthors
|
||||||
.filter(
|
.filter(
|
||||||
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
|
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
|
||||||
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'")
|
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" +
|
||||||
|
PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());
|
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());
|
||||||
|
|
Loading…
Reference in New Issue