forked from D-Net/dnet-hadoop
resolving conflicts
This commit is contained in:
commit
2a77ebb431
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||||
|
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
|
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
|
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -168,38 +165,10 @@ public class GenerateRorActionSetJob {
|
||||||
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
||||||
res.add(new AtomicAction<>(Organization.class, o));
|
res.add(new AtomicAction<>(Organization.class, o));
|
||||||
|
|
||||||
for (final Relationship rorRel : r.getRelationships()) {
|
|
||||||
if (rorRel.getType().equalsIgnoreCase("parent")) {
|
|
||||||
final String orgId1 = calculateOpenaireId(r.getId());
|
|
||||||
final String orgId2 = calculateOpenaireId(rorRel.getId());
|
|
||||||
res
|
|
||||||
.add(
|
|
||||||
new AtomicAction<>(Relation.class,
|
|
||||||
calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
|
|
||||||
res
|
|
||||||
.add(
|
|
||||||
new AtomicAction<>(Relation.class,
|
|
||||||
calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
|
|
||||||
final Relation rel = new Relation();
|
|
||||||
rel.setSource(source);
|
|
||||||
rel.setTarget(target);
|
|
||||||
rel.setRelType(ORG_ORG_RELTYPE);
|
|
||||||
rel.setSubRelType(ModelConstants.RELATIONSHIP);
|
|
||||||
rel.setRelClass(relClass);
|
|
||||||
rel.setCollectedfrom(ROR_COLLECTED_FROM);
|
|
||||||
rel.setDataInfo(ROR_DATA_INFO);
|
|
||||||
rel.setLastupdatetimestamp(System.currentTimeMillis());
|
|
||||||
return rel;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String calculateOpenaireId(final String rorId) {
|
private static String calculateOpenaireId(final String rorId) {
|
||||||
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
|
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,54 @@ object SparkGenerateDoiBoost {
|
||||||
val workingDirPath = parser.get("workingPath")
|
val workingDirPath = parser.get("workingPath")
|
||||||
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
||||||
|
|
||||||
|
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
|
||||||
|
override def zero: Publication = null
|
||||||
|
|
||||||
|
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
||||||
|
|
||||||
|
if (b == null) {
|
||||||
|
if (a != null && a._2 != null) {
|
||||||
|
a._2.setId(a._1)
|
||||||
|
return a._2
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (a != null && a._2 != null) {
|
||||||
|
b.mergeOAFDataInfo(a._2)
|
||||||
|
b.mergeFrom(a._2)
|
||||||
|
b.setId(a._1)
|
||||||
|
val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor)
|
||||||
|
b.setAuthor(authors)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new Publication
|
||||||
|
}
|
||||||
|
|
||||||
|
override def merge(b1: Publication, b2: Publication): Publication = {
|
||||||
|
if (b1 == null) {
|
||||||
|
if (b2 != null)
|
||||||
|
return b2
|
||||||
|
} else {
|
||||||
|
if (b2 != null) {
|
||||||
|
b1.mergeOAFDataInfo(b2)
|
||||||
|
b1.mergeFrom(b2)
|
||||||
|
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
||||||
|
b1.setAuthor(authors)
|
||||||
|
if (b2.getId != null && b2.getId.nonEmpty)
|
||||||
|
b1.setId(b2.getId)
|
||||||
|
return b1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new Publication
|
||||||
|
}
|
||||||
|
|
||||||
|
override def finish(reduction: Publication): Publication = reduction
|
||||||
|
|
||||||
|
override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication]
|
||||||
|
|
||||||
|
override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication]
|
||||||
|
}
|
||||||
|
|
||||||
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||||
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
|
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
|
||||||
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||||
|
|
|
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -102,6 +103,7 @@ public class SparkBulkTagJob {
|
||||||
ResultTagger resultTagger = new ResultTagger();
|
ResultTagger resultTagger = new ResultTagger();
|
||||||
readPath(spark, inputPath, resultClazz)
|
readPath(spark, inputPath, resultClazz)
|
||||||
.map(patchResult(), Encoders.bean(resultClazz))
|
.map(patchResult(), Encoders.bean(resultClazz))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<R, R>) value -> resultTagger
|
(MapFunction<R, R>) value -> resultTagger
|
||||||
.enrichContextCriteria(
|
.enrichContextCriteria(
|
||||||
|
|
Loading…
Reference in New Issue