forked from D-Net/dnet-hadoop
merge with upstream
This commit is contained in:
commit
05c452f58d
|
@ -531,12 +531,12 @@ public class PublicationToOaf implements Serializable {
|
||||||
dataInfo.setInferred(false);
|
dataInfo.setInferred(false);
|
||||||
dataInfo.setTrust("0.9");
|
dataInfo.setTrust("0.9");
|
||||||
dataInfo
|
dataInfo
|
||||||
.setProvenanceaction(
|
.setProvenanceaction(
|
||||||
mapQualifier(
|
mapQualifier(
|
||||||
"sysimport:crosswalk:entityregistry",
|
"sysimport:crosswalk:entityregistry",
|
||||||
"Harvested",
|
"Harvested",
|
||||||
"dnet:provenanceActions",
|
"dnet:provenanceActions",
|
||||||
"dnet:provenanceActions"));
|
"dnet:provenanceActions"));
|
||||||
sp.setDataInfo(dataInfo);
|
sp.setDataInfo(dataInfo);
|
||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,7 +158,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
|
|
||||||
rels.foreach(s => logger.info(s.getTarget))
|
rels.foreach(s => logger.info(s.getTarget))
|
||||||
assertEquals(rels.size, 3 )
|
assertEquals(rels.size, 6 )
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,9 +44,6 @@ public class PropagationConstant {
|
||||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
|
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
|
||||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
|
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
|
private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
|
||||||
|
|
|
@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ public class PrepareResultOrcidAssociationStep1 {
|
||||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||||
+ " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
|
+ " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
|
||||||
+" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
|
+ " lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
|
||||||
+ " GROUP BY id) r_t "
|
+ " GROUP BY id) r_t "
|
||||||
+ " JOIN ("
|
+ " JOIN ("
|
||||||
+ " SELECT source, target "
|
+ " SELECT source, target "
|
||||||
|
|
|
@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -24,6 +23,7 @@ import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
@ -203,7 +203,7 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
}
|
}
|
||||||
for (StructuredProperty pid : pids.get()) {
|
for (StructuredProperty pid : pids.get()) {
|
||||||
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) ||
|
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) ||
|
||||||
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
|
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,9 +5,7 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
|
|
||||||
import eu.dnetlib.dhp.PropagationConstant;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -22,8 +20,11 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.PropagationConstant;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
|
||||||
public class OrcidPropagationJobTest {
|
public class OrcidPropagationJobTest {
|
||||||
|
@ -170,7 +171,8 @@ public class OrcidPropagationJobTest {
|
||||||
.filter(
|
.filter(
|
||||||
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
|
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
|
||||||
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" +
|
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" +
|
||||||
ModelConstants.ORCID_PENDING + "'")
|
|
||||||
|
ModelConstants.ORCID_PENDING + "'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());
|
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());
|
||||||
|
|
|
@ -191,11 +191,13 @@ public class CleaningFunctions {
|
||||||
}
|
}
|
||||||
|
|
||||||
final Set<String> collectedFrom = Optional
|
final Set<String> collectedFrom = Optional
|
||||||
.ofNullable(r.getCollectedfrom())
|
.ofNullable(r.getCollectedfrom())
|
||||||
.map(c -> c.stream()
|
.map(
|
||||||
.map(KeyValue::getKey)
|
c -> c
|
||||||
.collect(Collectors.toCollection(HashSet::new)))
|
.stream()
|
||||||
.orElse(new HashSet<>());
|
.map(KeyValue::getKey)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new)))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
|
||||||
for (Author a : r.getAuthor()) {
|
for (Author a : r.getAuthor()) {
|
||||||
if (Objects.isNull(a.getPid())) {
|
if (Objects.isNull(a.getPid())) {
|
||||||
|
@ -211,12 +213,13 @@ public class CleaningFunctions {
|
||||||
.map(p -> {
|
.map(p -> {
|
||||||
// hack to distinguish orcid from orcid_pending
|
// hack to distinguish orcid from orcid_pending
|
||||||
String pidProvenance = Optional
|
String pidProvenance = Optional
|
||||||
.ofNullable(p.getDataInfo())
|
.ofNullable(p.getDataInfo())
|
||||||
.map(d -> Optional
|
.map(
|
||||||
.ofNullable(d.getProvenanceaction())
|
d -> Optional
|
||||||
.map(Qualifier::getClassid)
|
.ofNullable(d.getProvenanceaction())
|
||||||
.orElse(""))
|
.map(Qualifier::getClassid)
|
||||||
.orElse("");
|
.orElse(""))
|
||||||
|
.orElse("");
|
||||||
if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
|
if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
|
||||||
p.getQualifier().setClassid(ModelConstants.ORCID);
|
p.getQualifier().setClassid(ModelConstants.ORCID);
|
||||||
} else {
|
} else {
|
||||||
|
@ -229,8 +232,8 @@ public class CleaningFunctions {
|
||||||
Collectors
|
Collectors
|
||||||
.toMap(
|
.toMap(
|
||||||
p -> p.getQualifier().getClassid() + p.getValue(),
|
p -> p.getQualifier().getClassid() + p.getValue(),
|
||||||
Function.identity(),
|
Function.identity(),
|
||||||
(p1, p2) -> p1,
|
(p1, p2) -> p1,
|
||||||
LinkedHashMap::new))
|
LinkedHashMap::new))
|
||||||
.values()
|
.values()
|
||||||
.stream()
|
.stream()
|
||||||
|
|
|
@ -7,11 +7,6 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -20,13 +15,18 @@ import org.dom4j.io.SAXReader;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Disabled;
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||||
import org.mockito.Mock;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
//TODO to enable it we need to update the joined_entity.json test file
|
//TODO to enable it we need to update the joined_entity.json test file
|
||||||
//@Disabled
|
//@Disabled
|
||||||
|
@ -44,34 +44,30 @@ public class XmlRecordFactoryTest {
|
||||||
assertNotNull(je);
|
assertNotNull(je);
|
||||||
|
|
||||||
Document doc = buildXml(je);
|
Document doc = buildXml(je);
|
||||||
////TODO specific test assertion on doc
|
//// TODO specific test assertion on doc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testBologna() throws IOException, DocumentException {
|
void testBologna() throws IOException, DocumentException {
|
||||||
final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json"));
|
final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json"));
|
||||||
Publication oaf = new ObjectMapper().readValue(json, Publication.class);
|
Publication oaf = new ObjectMapper().readValue(json, Publication.class);
|
||||||
assertNotNull(oaf);
|
assertNotNull(oaf);
|
||||||
JoinedEntity je = new JoinedEntity();
|
JoinedEntity je = new JoinedEntity();
|
||||||
je.setEntity(oaf);
|
je.setEntity(oaf);
|
||||||
assertNotNull(je);
|
assertNotNull(je);
|
||||||
|
|
||||||
Document doc = buildXml(je);
|
Document doc = buildXml(je);
|
||||||
//TODO specific test assertion on doc
|
// TODO specific test assertion on doc
|
||||||
|
|
||||||
System.out.println(doc.asXML());
|
System.out.println(doc.asXML());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Document buildXml(JoinedEntity je) throws DocumentException {
|
private Document buildXml(JoinedEntity je) throws DocumentException {
|
||||||
ContextMapper contextMapper = new ContextMapper();
|
ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
|
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
|
||||||
otherDsTypeId);
|
otherDsTypeId);
|
||||||
|
|
||||||
String xml = xmlRecordFactory.build(je);
|
String xml = xmlRecordFactory.build(je);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue