diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 89bdf0982..053300696 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -57,7 +57,10 @@ public class PropagationConstant { public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID = "result:organization:semrel"; - public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through sematic relations"; + public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through semantic relations"; + + public static final String PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID = "project:organization:semrel"; + public static final String PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of participation to project through semantic relations"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation"; @@ -171,6 +174,41 @@ public class PropagationConstant { return newRelations; } + public static Relation getRelation(String source, String target, String rel_class) { + if (ModelConstants.HAS_PARTICIPANT.equals(rel_class)) { + return getParticipantRelation(source, target, rel_class); + } else + return getAffiliationRelation(source, target, rel_class); + } + + public static Relation getParticipantRelation( + String source, + String target, + String rel_class) { + return getRelation( + source, target, + rel_class, + ModelConstants.PROJECT_ORGANIZATION, + ModelConstants.PARTICIPATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME); + } + + public static Relation getAffiliationRelation( + String source, + String target, + String rel_class) { + return getRelation( + source, target, + rel_class, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME); + } + public static Relation getRelation( String source, String target, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/Leaves.java similarity index 79% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/Leaves.java index 7984721e8..e010b54c0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/Leaves.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java similarity index 81% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java index 23909fd9a..8d3432f06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; @@ -47,13 +47,20 @@ public class PrepareInfo implements Serializable { "' and datainfo.deletedbyinference = false " + "GROUP BY source"; + // associate projects to all the participant orgs + private static final String PROJECT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.HAS_PARTICIPANT.toLowerCase() + + "' and datainfo.deletedbyinference = false " + + "GROUP BY source"; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json")); + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -74,6 +81,9 @@ public class PrepareInfo implements Serializable { final String resultOrganizationPath = parser.get("resultOrgPath"); log.info("resultOrganizationPath: {}", resultOrganizationPath); + final String projectOrgPath = parser.get("projectOrganizationPath"); + log.info("projectOrgPath: {}", projectOrgPath); + final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); @@ -89,11 +99,13 @@ public class PrepareInfo implements Serializable { childParentPath, leavesPath, resultOrganizationPath, + projectOrgPath, relationPath)); } private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, - String currentIterationPath, String resultOrganizationPath, String relationPath) { + String currentIterationPath, String resultOrganizationPath, String projectOrganizationPath, + String relationPath) { Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); relation.createOrReplaceTempView("relation"); @@ -113,6 +125,14 @@ public class PrepareInfo implements Serializable { .option("compression", "gzip") .json(resultOrganizationPath); + spark + .sql(PROJECT_ORGANIZATION_QUERY) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(projectOrganizationPath); + relation .filter( (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && @@ -120,7 +140,16 @@ public class PrepareInfo implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(relationPath); + .json(relationPath + "/result"); + + relation + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.HAS_PARTICIPANT)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(relationPath + "/project"); Dataset children = spark .sql( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PropagationCounter.java similarity index 97% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PropagationCounter.java index 788eff0e3..1c408d1c3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PropagationCounter.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java similarity index 67% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index cfc69a8f0..27e502aba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; @@ -30,7 +30,8 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class SparkResultToOrganizationFromSemRel implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; - public static final String NEW_RELATION_PATH = "/newRelation"; + public static final String NEW_RESULT_RELATION_PATH = "/newResultRelation"; + public static final String NEW_PROJECT_RELATION_PATH = "/newProjectRelation"; public static void main(String[] args) throws Exception { @@ -38,7 +39,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -62,6 +63,9 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { final String resultOrganizationPath = parser.get("resultOrgPath"); log.info("resultOrganizationPath: {}", resultOrganizationPath); + final String projectOrganizationPath = parser.get("projectOrganizationPath"); + log.info("projectOrganizationPath: {}", projectOrganizationPath); + final String workingPath = parser.get("workingDir"); log.info("workingPath: {}", workingPath); @@ -88,6 +92,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { leavesPath, childParentPath, resultOrganizationPath, + projectOrganizationPath, relationPath, workingPath, outputPath, @@ -98,13 +103,14 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { String leavesPath, String childParentPath, String resultOrganizationPath, + String projectOrganizationPath, String graphPath, String workingPath, String outputPath, int iterations) { if (iterations == 1) { doPropagateOnce( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath, workingPath, outputPath); } else { @@ -123,26 +129,34 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { notReachedFirstParent); doPropagate( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath, workingPath, outputPath, propagationCounter); } } private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath, - String resultOrganizationPath, String graphPath, String workingPath, + String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath, String outputPath) { StepActions .execStep( - spark, graphPath, workingPath + NEW_RELATION_PATH, - leavesPath, childParentPath, resultOrganizationPath); + spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION); - addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); + addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); + + StepActions + .execStep( + spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH, + leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT); + + addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath); } private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, - String resultOrganizationPath, String graphPath, String workingPath, String outputPath, + String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath, + String outputPath, PropagationCounter propagationCounter) { int iteration = 0; long leavesCount; @@ -151,13 +165,18 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { iteration++; StepActions .execStep( - spark, graphPath, workingPath + NEW_RELATION_PATH, - leavesPath, childParentPath, resultOrganizationPath); + spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION); + StepActions + .execStep( + spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH, + leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT); + StepActions .prepareForNextStep( - spark, workingPath + NEW_RELATION_PATH, resultOrganizationPath, leavesPath, - childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); - moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); + spark, workingPath, resultOrganizationPath, projectOrganizationPath, leavesPath, + childParentPath, workingPath + "/leaves", workingPath + "/resOrg", workingPath + "/projOrg"); + moveOutput(spark, workingPath, leavesPath, resultOrganizationPath, projectOrganizationPath); leavesCount = readPath(spark, leavesPath, Leaves.class).count(); } while (leavesCount > 0 && iteration < MAX_ITERATION); @@ -185,7 +204,8 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { propagationCounter.getNotReachedFirstParent().add(1); } - addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); + addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); + addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath); } private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, @@ -204,6 +224,28 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { } + private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, + String resultOrganizationPath, String projectOrganizationPath) { + readPath(spark, workingPath + "/leaves", Leaves.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesPath); + + readPath(spark, workingPath + "/resOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + readPath(spark, workingPath + "/projOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(projectOrganizationPath); + + } + private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { Dataset relation = readPath(spark, newRelationPath, Relation.class); @@ -212,16 +254,21 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .mapGroups( (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Relation.class)) .flatMap( - (FlatMapFunction) r -> Arrays - .asList( - r, getRelation( - r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) - .iterator() + (FlatMapFunction) r -> { + if (r.getSource().startsWith("50|")) { + return Arrays + .asList( + r, getAffiliationRelation( + r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF)) + .iterator(); + } else { + return Arrays + .asList( + r, getParticipantRelation( + r.getTarget(), r.getSource(), ModelConstants.IS_PARTICIPANT)) + .iterator(); + } + } , Encoders.bean(Relation.class)) .write() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java similarity index 75% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java index 1adbbe60e..386ea1a5c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java @@ -1,8 +1,10 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.readPath; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; import java.io.Serializable; import java.util.*; @@ -14,8 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; @@ -28,13 +28,14 @@ public class StepActions implements Serializable { public static void execStep(SparkSession spark, String graphPath, String newRelationPath, - String leavesPath, String chldParentOrgPath, String resultOrgPath) { + String leavesPath, String chldParentOrgPath, String entityOrgPath, String rel_class) { Dataset relationGraph = readPath(spark, graphPath, Relation.class); // select only the relation source target among those proposed by propagation that are not already existent + getNewRels( newRelationPath, relationGraph, - getPropagationRelation(spark, leavesPath, chldParentOrgPath, resultOrgPath)); + getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, rel_class)); } @@ -45,16 +46,33 @@ public class StepActions implements Serializable { changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); // add the new relations obtained from propagation to the keyvalueset result organization - updateResultOrganization( + updateEntityOrganization( spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); } - private static void updateResultOrganization(SparkSession spark, String resultOrgPath, + public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath, + String projectOrgPath, + String leavesPath, String chldParentOrgPath, String leavesOutputPath, + String orgOutputPath, String outputProjectPath) { + // use of the parents as new leaves set + changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); + + // add the new relations obtained from propagation to the keyvalueset result organization + updateEntityOrganization( + spark, resultOrgPath, readPath(spark, selectedRelsPath + NEW_RESULT_RELATION_PATH, Relation.class), + orgOutputPath); + + updateEntityOrganization( + spark, projectOrgPath, readPath(spark, selectedRelsPath + NEW_PROJECT_RELATION_PATH, Relation.class), + outputProjectPath); + } + + private static void updateEntityOrganization(SparkSession spark, String entityOrgPath, Dataset selectedRels, String outputPath) { - Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); - resultOrg + Dataset entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class); + entityOrg .joinWith( - selectedRels, resultOrg + selectedRels, entityOrg .col("key") .equalTo(selectedRels.col("source")), "left") @@ -129,7 +147,12 @@ public class StepActions implements Serializable { .getDataInfo() .getProvenanceaction() .getClassid() - .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) + .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID) + && !rel + .getDataInfo() + .getProvenanceaction() + .getClassid() + .equals(PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID)) .count() > 0) { return null; } @@ -152,19 +175,20 @@ public class StepActions implements Serializable { private static Dataset getPropagationRelation(SparkSession spark, String leavesPath, String chldParentOrgPath, - String resultOrgPath) { + String entityOrgPath, + String semantics) { Dataset childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); - Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); + Dataset entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class); Dataset leaves = readPath(spark, leavesPath, Leaves.class); childParent.createOrReplaceTempView("childParent"); - resultOrg.createOrReplaceTempView("resultOrg"); + entityOrg.createOrReplaceTempView("entityOrg"); leaves.createOrReplaceTempView("leaves"); Dataset resultParent = spark .sql( - "SELECT resId as key, " + + "SELECT entityId as key, " + "collect_set(parent) valueSet " + "FROM (SELECT key as child, parent " + " FROM childParent " + @@ -172,14 +196,14 @@ public class StepActions implements Serializable { "JOIN leaves " + "ON leaves.value = cp.child " + "JOIN (" + - "SELECT key as resId, org " + - "FROM resultOrg " + + "SELECT key as entityId, org " + + "FROM entityOrg " + "LATERAL VIEW explode (valueSet) ks as org ) as ro " + "ON leaves.value = ro.org " + - "GROUP BY resId") + "GROUP BY entityId") .as(Encoders.bean(KeyValueSet.class)); - // create new relations from result to organization for each result linked to a leaf + // create new relations from entity to organization for each entity linked to a leaf return resultParent .flatMap( (FlatMapFunction) v -> v @@ -189,12 +213,7 @@ public class StepActions implements Serializable { orgId -> getRelation( v.getKey(), orgId, - ModelConstants.HAS_AUTHOR_INSTITUTION, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) + semantics)) .collect(Collectors.toList()) .iterator(), Encoders.bean(Relation.class)); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json similarity index 87% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json index c79bfe05d..b59937331 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json @@ -40,5 +40,11 @@ "paramLongName": "relationPath", "paramDescription": "the path where to store the selected subset of relations", "paramRequired": false + }, + { + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json similarity index 90% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json index e09cd62fa..5a8597f38 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json @@ -52,5 +52,10 @@ "paramLongName": "iterations", "paramDescription": "the number of iterations to be computed", "paramRequired": false - } + },{ + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true +} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml similarity index 96% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 5ce2f5c06..ff6ec8f37 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -134,7 +134,7 @@ yarn cluster PrepareResultOrganizationAssociation - eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo + eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -150,6 +150,7 @@ --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath --relationPath${workingDir}/preparedInfo/relation @@ -161,7 +162,7 @@ yarn cluster resultToOrganizationFromSemRel - eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java similarity index 72% rename from dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java index 2d2668db3..7c9c2b97b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java @@ -1,22 +1,17 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; - -import static eu.dnetlib.dhp.PropagationConstant.readPath; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -28,7 +23,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareInfoJobTest { @@ -78,11 +72,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" @@ -223,11 +218,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest2") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" @@ -343,11 +339,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" @@ -355,7 +352,38 @@ public class PrepareInfoJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") + .textFile(workingDir.toString() + "/relation/result") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(7, verificationDs.count()); + + } + + @Test + public void relationProjectTest() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation/project") .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); @@ -373,11 +401,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" @@ -498,6 +527,141 @@ public class PrepareInfoJobTest { } + @Test + public void projectOrganizationTest1() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/projectOrganization/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + } + @Test public void foundLeavesTest1() throws Exception { @@ -507,11 +671,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" @@ -534,11 +699,12 @@ public class PrepareInfoJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1") .getPath(), "-hive_metastore_uris", "", "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", "-relationPath", workingDir.toString() + "/relation" diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java new file mode 100644 index 000000000..2e75c75ad --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -0,0 +1,900 @@ + +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class SparkJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void completeResultExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD temp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(36, temp.count()); + + JavaRDD result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|")); + Assertions.assertEquals(18, result.count()); + result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + result + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + result + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + result + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + result + .foreach( + r -> Assertions + .assertEquals( + "0.85", + r.getDataInfo().getTrust())); + + Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); + result + .filter(r -> r.getSource().substring(0, 3).equals("50|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); + Assertions + .assertEquals( + 2, + result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 3, + result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, + result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 1, + result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 1, + result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + result + .filter(r -> r.getSource().substring(0, 3).equals("20|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); + Assertions + .assertEquals( + 1, + result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 1, + result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, + result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 2, + result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 3, + result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertTrue( + result + .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } + + @Test + public void completeProjectExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD temp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(36, temp.count()); + + JavaRDD project = temp + .filter(r -> r.getSource().startsWith("40|") || r.getTarget().startsWith("40|")); + Assertions.assertEquals(18, project.count()); + + project.foreach(r -> Assertions.assertEquals(ModelConstants.PARTICIPATION, r.getSubRelType())); + project.foreach(r -> Assertions.assertEquals(ModelConstants.PROJECT_ORGANIZATION, r.getRelType())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + project + .foreach( + r -> Assertions + .assertEquals( + "0.85", + r.getDataInfo().getTrust())); + + Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("40|")).count()); + project + .filter(r -> r.getSource().substring(0, 3).equals("40|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_PARTICIPANT, r.getRelClass())); + Assertions + .assertEquals( + 2, + project.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 3, + project.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, + project.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 1, + project.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 1, + project.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + project + .filter(r -> r.getSource().substring(0, 3).equals("20|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.IS_PARTICIPANT, r.getRelClass())); + Assertions + .assertEquals( + 1, + project.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 1, + project.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, + project.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 2, + project.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 3, + project.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } + + @Test + public void singleIterationExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString(), + "-iterations", "1" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD temp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(16, temp.count()); + + Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("50|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("50|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("40|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("40|")).count()); + Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count()); + Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count()); + +// JavaRDD result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|")); +// Assertions.assertEquals(18, result.count()); +// result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); +// result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, +// r.getDataInfo().getProvenanceaction().getClassid())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, +// r.getDataInfo().getProvenanceaction().getClassname())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// "0.85", +// r.getDataInfo().getTrust())); +// +// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); +// result +// .filter(r -> r.getSource().substring(0, 3).equals("50|")) +// .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); +// Assertions +// .assertEquals( +// 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); +// +// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); +// result +// .filter(r -> r.getSource().substring(0, 3).equals("20|")) +// .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); +// Assertions +// .assertEquals( +// 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java similarity index 92% rename from dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java index 5c715f3b9..64339e3b7 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; +package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import java.io.IOException; import java.nio.file.Files; @@ -73,21 +73,22 @@ public class StepActionsTest { .execStep( spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result") .getPath(), workingDir.toString() + "/newRelationPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") - .getPath()); + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), + ModelConstants.HAS_AUTHOR_INSTITUTION); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -203,19 +204,19 @@ public class StepActionsTest { spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); @@ -248,19 +249,19 @@ public class StepActionsTest { spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java deleted file mode 100644 index 7dd575b66..000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java +++ /dev/null @@ -1,325 +0,0 @@ - -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; - -import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; -import static eu.dnetlib.dhp.PropagationConstant.readPath; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.PropagationConstant; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; - -public class SparkJobTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(PrepareInfoJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - public void completeExecution() throws Exception { - - final String graphPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep") - .getPath(); - final String leavesPath = getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") - .getPath(); - final String childParentPath = getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") - .getPath(); - - final String resultOrgPath = getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") - .getPath(); - - readPath(spark, leavesPath, Leaves.class) - .write() - .option("compression", "gzip") - .json(workingDir.toString() + "/leavesInput"); - - readPath(spark, resultOrgPath, KeyValueSet.class) - .write() - .option("compression", "gzip") - .json(workingDir.toString() + "/orgsInput"); - - SparkResultToOrganizationFromSemRel - - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-relationPath", graphPath, - "-hive_metastore_uris", "", - "-outputPath", workingDir.toString() + "/finalrelation", - "-leavesPath", workingDir.toString() + "/leavesInput", - "-resultOrgPath", workingDir.toString() + "/orgsInput", - "-childParentPath", childParentPath, - "-workingDir", workingDir.toString() - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/finalrelation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); - - Assertions.assertEquals(18, tmp.count()); - tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); - tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); - tmp - .foreach( - r -> Assertions - .assertEquals( - PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); - tmp - .foreach( - r -> Assertions - .assertEquals( - PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, - r.getDataInfo().getProvenanceaction().getClassid())); - tmp - .foreach( - r -> Assertions - .assertEquals( - PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, - r.getDataInfo().getProvenanceaction().getClassname())); - tmp - .foreach( - r -> Assertions - .assertEquals( - "0.85", - r.getDataInfo().getTrust())); - - Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); - tmp - .filter(r -> r.getSource().substring(0, 3).equals("50|")) - .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); - Assertions - .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); - Assertions - .assertEquals( - 3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); - Assertions - .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); - Assertions - .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); - Assertions - .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); - - Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); - tmp - .filter(r -> r.getSource().substring(0, 3).equals("20|")) - .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); - Assertions - .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); - Assertions - .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); - Assertions - .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); - Assertions - .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); - Assertions - .assertEquals( - 3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) - .map(r -> r.getTarget()) - .collect() - .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) - .map(r -> r.getTarget()) - .collect() - .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) - .map(r -> r.getTarget()) - .collect() - .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) - .map(r -> r.getTarget()) - .collect() - .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) - .map(r -> r.getTarget()) - .collect() - .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) - .map(r -> r.getTarget()) - .collect() - .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) - .map(r -> r.getTarget()) - .collect() - .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - - Assertions - .assertTrue( - tmp - .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) - .map(r -> r.getTarget()) - .collect() - .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1/relation similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest1/relation diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest2/relation similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/childparenttest2/relation diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/childparent similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/childparent diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/leaves similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/leaves diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project new file mode 100644 index 000000000..e8e35f555 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","validated":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization new file mode 100644 index 000000000..81803f29d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization @@ -0,0 +1,5 @@ +{"key":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} +{"key":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]} +{"key":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]} +{"key":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/relation similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/relation diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/resultorganization similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/resultorganization diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation new file mode 100644 index 000000000..10d46b1cb --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest/relation new file mode 100644 index 000000000..5aeabb71b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/resultorganizationtest/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation deleted file mode 100644 index db7db8fdd..000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation +++ /dev/null @@ -1,14 +0,0 @@ -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file