merge with upstream

This commit is contained in:
Miriam Baglioni 2020-12-03 10:26:45 +01:00
commit 05c452f58d
8 changed files with 43 additions and 45 deletions

View File

@ -531,12 +531,12 @@ public class PublicationToOaf implements Serializable {
dataInfo.setInferred(false); dataInfo.setInferred(false);
dataInfo.setTrust("0.9"); dataInfo.setTrust("0.9");
dataInfo dataInfo
.setProvenanceaction( .setProvenanceaction(
mapQualifier( mapQualifier(
"sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry",
"Harvested", "Harvested",
"dnet:provenanceActions", "dnet:provenanceActions",
"dnet:provenanceActions")); "dnet:provenanceActions"));
sp.setDataInfo(dataInfo); sp.setDataInfo(dataInfo);
return sp; return sp;
} }

View File

@ -158,7 +158,7 @@ class CrossrefMappingTest {
rels.foreach(s => logger.info(s.getTarget)) rels.foreach(s => logger.info(s.getTarget))
assertEquals(rels.size, 3 ) assertEquals(rels.size, 6 )
} }

View File

@ -44,9 +44,6 @@ public class PropagationConstant {
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb " private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
@ -104,7 +104,7 @@ public class PrepareResultOrcidAssociationStep1 {
+ " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
+" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
+ " GROUP BY id) r_t " + " GROUP BY id) r_t "
+ " JOIN (" + " JOIN ("
+ " SELECT source, target " + " SELECT source, target "

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -24,6 +23,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ -203,7 +203,7 @@ public class SparkOrcidToResultFromSemRelJob {
} }
for (StructuredProperty pid : pids.get()) { for (StructuredProperty pid : pids.get()) {
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) ||
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
return true; return true;
} }
} }

View File

@ -5,9 +5,7 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
import eu.dnetlib.dhp.PropagationConstant;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -22,8 +20,11 @@ import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.PropagationConstant;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
public class OrcidPropagationJobTest { public class OrcidPropagationJobTest {
@ -170,7 +171,8 @@ public class OrcidPropagationJobTest {
.filter( .filter(
"id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
+ "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" +
ModelConstants.ORCID_PENDING + "'")
ModelConstants.ORCID_PENDING + "'")
.count()); .count());
Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());

View File

@ -191,11 +191,13 @@ public class CleaningFunctions {
} }
final Set<String> collectedFrom = Optional final Set<String> collectedFrom = Optional
.ofNullable(r.getCollectedfrom()) .ofNullable(r.getCollectedfrom())
.map(c -> c.stream() .map(
.map(KeyValue::getKey) c -> c
.collect(Collectors.toCollection(HashSet::new))) .stream()
.orElse(new HashSet<>()); .map(KeyValue::getKey)
.collect(Collectors.toCollection(HashSet::new)))
.orElse(new HashSet<>());
for (Author a : r.getAuthor()) { for (Author a : r.getAuthor()) {
if (Objects.isNull(a.getPid())) { if (Objects.isNull(a.getPid())) {
@ -211,12 +213,13 @@ public class CleaningFunctions {
.map(p -> { .map(p -> {
// hack to distinguish orcid from orcid_pending // hack to distinguish orcid from orcid_pending
String pidProvenance = Optional String pidProvenance = Optional
.ofNullable(p.getDataInfo()) .ofNullable(p.getDataInfo())
.map(d -> Optional .map(
.ofNullable(d.getProvenanceaction()) d -> Optional
.map(Qualifier::getClassid) .ofNullable(d.getProvenanceaction())
.orElse("")) .map(Qualifier::getClassid)
.orElse(""); .orElse(""))
.orElse("");
if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
p.getQualifier().setClassid(ModelConstants.ORCID); p.getQualifier().setClassid(ModelConstants.ORCID);
} else { } else {
@ -229,8 +232,8 @@ public class CleaningFunctions {
Collectors Collectors
.toMap( .toMap(
p -> p.getQualifier().getClassid() + p.getValue(), p -> p.getQualifier().getClassid() + p.getValue(),
Function.identity(), Function.identity(),
(p1, p2) -> p1, (p1, p2) -> p1,
LinkedHashMap::new)) LinkedHashMap::new))
.values() .values()
.stream() .stream()

View File

@ -7,11 +7,6 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
@ -20,13 +15,18 @@ import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import org.mockito.Mock; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
//TODO to enable it we need to update the joined_entity.json test file //TODO to enable it we need to update the joined_entity.json test file
//@Disabled //@Disabled
@ -44,34 +44,30 @@ public class XmlRecordFactoryTest {
assertNotNull(je); assertNotNull(je);
Document doc = buildXml(je); Document doc = buildXml(je);
////TODO specific test assertion on doc //// TODO specific test assertion on doc
} }
@Test @Test
void testBologna() throws IOException, DocumentException { void testBologna() throws IOException, DocumentException {
final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json")); final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json"));
Publication oaf = new ObjectMapper().readValue(json, Publication.class); Publication oaf = new ObjectMapper().readValue(json, Publication.class);
assertNotNull(oaf); assertNotNull(oaf);
JoinedEntity je = new JoinedEntity(); JoinedEntity je = new JoinedEntity();
je.setEntity(oaf); je.setEntity(oaf);
assertNotNull(je); assertNotNull(je);
Document doc = buildXml(je); Document doc = buildXml(je);
//TODO specific test assertion on doc // TODO specific test assertion on doc
System.out.println(doc.asXML()); System.out.println(doc.asXML());
} }
private Document buildXml(JoinedEntity je) throws DocumentException { private Document buildXml(JoinedEntity je) throws DocumentException {
ContextMapper contextMapper = new ContextMapper(); ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId); otherDsTypeId);
String xml = xmlRecordFactory.build(je); String xml = xmlRecordFactory.build(je);