forked from antonis.lempesis/dnet-hadoop
deleted hierarchical rels from ror action set
This commit is contained in:
parent
922c6d66ef
commit
634869ce95
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||||
|
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
|
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
|
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -64,20 +61,16 @@ public class GenerateRorActionSetJob {
|
||||||
|
|
||||||
private static final String ROR_NS_PREFIX = "ror_________";
|
private static final String ROR_NS_PREFIX = "ror_________";
|
||||||
|
|
||||||
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues(
|
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues("10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
|
||||||
"10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
|
|
||||||
|
|
||||||
private static final DataInfo ROR_DATA_INFO = dataInfo(
|
private static final DataInfo ROR_DATA_INFO = dataInfo(false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
|
||||||
false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
|
|
||||||
|
|
||||||
private static final Qualifier ROR_PID_TYPE = qualifier(
|
private static final Qualifier ROR_PID_TYPE = qualifier("ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
||||||
"ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(GenerateRorActionSetJob.class
|
||||||
GenerateRorActionSetJob.class
|
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
@ -116,8 +109,7 @@ public class GenerateRorActionSetJob {
|
||||||
readInputPath(spark, inputPath)
|
readInputPath(spark, inputPath)
|
||||||
.map(GenerateRorActionSetJob::convertRorOrg)
|
.map(GenerateRorActionSetJob::convertRorOrg)
|
||||||
.flatMap(List::iterator)
|
.flatMap(List::iterator)
|
||||||
.mapToPair(
|
.mapToPair(aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
|
||||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
}
|
}
|
||||||
|
@ -153,12 +145,9 @@ public class GenerateRorActionSetJob {
|
||||||
o.setEcnutscode(null);
|
o.setEcnutscode(null);
|
||||||
if (r.getCountry() != null) {
|
if (r.getCountry() != null) {
|
||||||
o
|
o
|
||||||
.setCountry(
|
.setCountry(qualifier(r.getCountry().getCountryCode(), r
|
||||||
qualifier(
|
|
||||||
r.getCountry().getCountryCode(), r
|
|
||||||
.getCountry()
|
.getCountry()
|
||||||
.getCountryName(),
|
.getCountryName(), ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE));
|
||||||
ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE));
|
|
||||||
} else {
|
} else {
|
||||||
o.setCountry(null);
|
o.setCountry(null);
|
||||||
}
|
}
|
||||||
|
@ -168,38 +157,10 @@ public class GenerateRorActionSetJob {
|
||||||
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
||||||
res.add(new AtomicAction<>(Organization.class, o));
|
res.add(new AtomicAction<>(Organization.class, o));
|
||||||
|
|
||||||
for (final Relationship rorRel : r.getRelationships()) {
|
|
||||||
if (rorRel.getType().equalsIgnoreCase("parent")) {
|
|
||||||
final String orgId1 = calculateOpenaireId(r.getId());
|
|
||||||
final String orgId2 = calculateOpenaireId(rorRel.getId());
|
|
||||||
res
|
|
||||||
.add(
|
|
||||||
new AtomicAction<>(Relation.class,
|
|
||||||
calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
|
|
||||||
res
|
|
||||||
.add(
|
|
||||||
new AtomicAction<>(Relation.class,
|
|
||||||
calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
|
|
||||||
final Relation rel = new Relation();
|
|
||||||
rel.setSource(source);
|
|
||||||
rel.setTarget(target);
|
|
||||||
rel.setRelType(ORG_ORG_RELTYPE);
|
|
||||||
rel.setSubRelType(ModelConstants.RELATIONSHIP);
|
|
||||||
rel.setRelClass(relClass);
|
|
||||||
rel.setCollectedfrom(ROR_COLLECTED_FROM);
|
|
||||||
rel.setDataInfo(ROR_DATA_INFO);
|
|
||||||
rel.setLastupdatetimestamp(System.currentTimeMillis());
|
|
||||||
return rel;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String calculateOpenaireId(final String rorId) {
|
private static String calculateOpenaireId(final String rorId) {
|
||||||
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
|
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
|
||||||
}
|
}
|
||||||
|
@ -212,8 +173,7 @@ public class GenerateRorActionSetJob {
|
||||||
final String type = e.getKey();
|
final String type = e.getKey();
|
||||||
final List<String> all = e.getValue().getAll();
|
final List<String> all = e.getValue().getAll();
|
||||||
if (all != null) {
|
if (all != null) {
|
||||||
final Qualifier qualifier = qualifier(
|
final Qualifier qualifier = qualifier(type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
||||||
type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
|
||||||
for (final String pid : all) {
|
for (final String pid : all) {
|
||||||
pids
|
pids
|
||||||
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO));
|
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO));
|
||||||
|
|
Loading…
Reference in New Issue