From 97d72d41c31cc222f50b47a05d4993692892fb3c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 31 May 2023 18:53:22 +0200 Subject: [PATCH] finalization of implementation and testing --- .../eu/dnetlib/dhp/PropagationConstant.java | 7 + .../PrepareInfo.java | 8 +- .../SparkResultToOrganizationFromSemRel.java | 50 +- .../StepActions.java | 103 +-- .../PrepareInfoJobTest.java | 166 +++++ .../SparkJobTest.java | 648 ++++++++++++++++-- .../StepActionsTest.java | 26 +- .../execstep/graph/project | 7 + .../execstep/graph/result | 7 + .../projectOrganization/projectorganization | 5 + .../execstep/relation | 14 - .../projectorganizationtest/relation | 7 + 12 files changed, 921 insertions(+), 127 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 53769c9fb..87528ef58 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -174,6 +174,13 @@ public class PropagationConstant { return newRelations; } + public static Relation getRelation(String source, String target, String rel_class){ + if (ModelConstants.HAS_PARTICIPANT.equals(rel_class)){ + return getParticipantRelation(source, target, rel_class); + }else + return getAffiliationRelation(source, target, rel_class); + } + public static Relation getParticipantRelation( String source, String target, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java index 7ad9c4cee..971ef436f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java @@ -49,7 +49,7 @@ public class PrepareInfo implements Serializable { // associate projects to all the participant orgs private static final String PROJECT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + "FROM relation " + - "WHERE lower(relclass) = '" + ModelConstants.IS_PARTICIPANT.toLowerCase() + + "WHERE lower(relclass) = '" + ModelConstants.HAS_PARTICIPANT.toLowerCase() + "' and datainfo.deletedbyinference = false " + "GROUP BY source"; @@ -103,7 +103,7 @@ public class PrepareInfo implements Serializable { } private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, - String currentIterationPath, String resultOrganizationPath, String resultProjectPath, String relationPath) { + String currentIterationPath, String resultOrganizationPath, String projectOrganizationPath, String relationPath) { Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); relation.createOrReplaceTempView("relation"); @@ -129,7 +129,7 @@ public class PrepareInfo implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(resultProjectPath); + .json(projectOrganizationPath); relation .filter( @@ -143,7 +143,7 @@ public class PrepareInfo implements Serializable { relation .filter( (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - r.getRelClass().equals(ModelConstants.IS_PARTICIPANT)) + r.getRelClass().equals(ModelConstants.HAS_PARTICIPANT)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index 19e55a905..dd32552ad 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -39,7 +39,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -129,7 +129,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { notReachedFirstParent); doPropagate( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + spark, leavesPath, childParentPath, resultOrganizationPath, projectOrganizationPath, graphPath, workingPath, outputPath, propagationCounter); } @@ -142,20 +142,20 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { StepActions .execStep( spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH, - leavesPath, childParentPath, resultOrganizationPath); + leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION); addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); StepActions .execStep( spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH, - leavesPath, childParentPath, projectOrganizationPath); + leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT); addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath); } private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, - String resultOrganizationPath, String graphPath, String workingPath, String outputPath, + String resultOrganizationPath, String projectOrganizationPath, String graphPath, String workingPath, String outputPath, PropagationCounter propagationCounter) { int iteration = 0; long leavesCount; @@ -164,13 +164,18 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { iteration++; StepActions .execStep( - spark, graphPath, workingPath + NEW_RESULT_RELATION_PATH, - leavesPath, childParentPath, resultOrganizationPath); + spark, graphPath + "/result", workingPath + NEW_RESULT_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath, ModelConstants.HAS_AUTHOR_INSTITUTION); + StepActions + .execStep( + spark, graphPath + "/project", workingPath + NEW_PROJECT_RELATION_PATH, + leavesPath, childParentPath, projectOrganizationPath, ModelConstants.HAS_PARTICIPANT); + StepActions .prepareForNextStep( - spark, workingPath + NEW_RESULT_RELATION_PATH, resultOrganizationPath, leavesPath, - childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); - moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); + spark, workingPath , resultOrganizationPath, projectOrganizationPath, leavesPath, + childParentPath, workingPath + "/leaves", workingPath + "/resOrg", workingPath + "/projOrg"); + moveOutput(spark, workingPath, leavesPath, resultOrganizationPath, projectOrganizationPath); leavesCount = readPath(spark, leavesPath, Leaves.class).count(); } while (leavesCount > 0 && iteration < MAX_ITERATION); @@ -199,6 +204,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { } addNewRelations(spark, workingPath + NEW_RESULT_RELATION_PATH, outputPath); + addNewRelations(spark, workingPath + NEW_PROJECT_RELATION_PATH, outputPath); } private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, @@ -217,6 +223,28 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { } + private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, + String resultOrganizationPath, String projectOrganizationPath) { + readPath(spark, workingPath + "/leaves", Leaves.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesPath); + + readPath(spark, workingPath + "/resOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + readPath(spark, workingPath + "/projOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(projectOrganizationPath); + + } + private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { Dataset relation = readPath(spark, newRelationPath, Relation.class); @@ -237,7 +265,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { return Arrays .asList( r, getParticipantRelation( - r.getTarget(), r.getSource(), ModelConstants.HAS_PARTICIPANT)) + r.getTarget(), r.getSource(), ModelConstants.IS_PARTICIPANT)) .iterator(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java index 5b6c397cf..de5034d38 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java @@ -3,6 +3,8 @@ package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.readPath; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; import java.io.Serializable; import java.util.*; @@ -26,13 +28,14 @@ public class StepActions implements Serializable { public static void execStep(SparkSession spark, String graphPath, String newRelationPath, - String leavesPath, String chldParentOrgPath, String entityOrgPath) { + String leavesPath, String chldParentOrgPath, String entityOrgPath, String rel_class) { Dataset relationGraph = readPath(spark, graphPath, Relation.class); // select only the relation source target among those proposed by propagation that are not already existent + getNewRels( newRelationPath, relationGraph, - getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, ModelConstants.HAS_AUTHOR_INSTITUTION)); + getPropagationRelation(spark, leavesPath, chldParentOrgPath, entityOrgPath, rel_class)); } @@ -43,16 +46,30 @@ public class StepActions implements Serializable { changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); // add the new relations obtained from propagation to the keyvalueset result organization - updateResultOrganization( + updateEntityOrganization( spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); } - private static void updateResultOrganization(SparkSession spark, String resultOrgPath, - Dataset selectedRels, String outputPath) { - Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); - resultOrg + public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath, String projectOrgPath, + String leavesPath, String chldParentOrgPath, String leavesOutputPath, + String orgOutputPath, String outputProjectPath) { + // use of the parents as new leaves set + changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); + + // add the new relations obtained from propagation to the keyvalueset result organization + updateEntityOrganization( + spark, resultOrgPath, readPath(spark, selectedRelsPath + NEW_RESULT_RELATION_PATH, Relation.class), orgOutputPath); + + updateEntityOrganization( + spark, projectOrgPath, readPath(spark, selectedRelsPath + NEW_PROJECT_RELATION_PATH, Relation.class), outputProjectPath); + } + + private static void updateEntityOrganization(SparkSession spark, String entityOrgPath, + Dataset selectedRels, String outputPath) { + Dataset entityOrg = readPath(spark, entityOrgPath, KeyValueSet.class); + entityOrg .joinWith( - selectedRels, resultOrg + selectedRels, entityOrg .col("key") .equalTo(selectedRels.col("source")), "left") @@ -111,38 +128,45 @@ public class StepActions implements Serializable { // construction of the set) // if at least one relation in the set was not produced by propagation no new relation will be returned + relationDataset - .union(newRels) - .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { + .union(newRels) + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { - ArrayList relationList = new ArrayList<>(); - relationList.add(it.next()); - it.forEachRemaining(rel -> relationList.add(rel)); + ArrayList relationList = new ArrayList<>(); + relationList.add(it.next()); + it.forEachRemaining(rel -> relationList.add(rel)); - if (relationList - .stream() - .filter( - rel -> !rel - .getDataInfo() - .getProvenanceaction() - .getClassid() - .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) - .count() > 0) { - return null; - } + if (relationList + .stream() + .filter( + rel -> !rel + .getDataInfo() + .getProvenanceaction() + .getClassid() + .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID) && !rel + .getDataInfo() + .getProvenanceaction() + .getClassid() + .equals(PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID)) + .count() > 0) { + return null; + } + + return new ObjectMapper().writeValueAsString(relationList.get(0)); + + }, Encoders.STRING()) + .filter(Objects::nonNull) + .map( + (MapFunction) r -> new ObjectMapper().readValue(r, Relation.class), + Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(newRelationPath); - return new ObjectMapper().writeValueAsString(relationList.get(0)); - }, Encoders.STRING()) - .filter(Objects::nonNull) - .map( - (MapFunction) r -> new ObjectMapper().readValue(r, Relation.class), - Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(newRelationPath); } @@ -172,20 +196,21 @@ public class StepActions implements Serializable { "ON leaves.value = cp.child " + "JOIN (" + "SELECT key as entityId, org " + - "FROM resultOrg " + + "FROM entityOrg " + "LATERAL VIEW explode (valueSet) ks as org ) as ro " + "ON leaves.value = ro.org " + - "GROUP BY resId") + "GROUP BY entityId") .as(Encoders.bean(KeyValueSet.class)); - // create new relations from result to organization for each result linked to a leaf + + // create new relations from entity to organization for each entity linked to a leaf return resultParent .flatMap( (FlatMapFunction) v -> v .getValueSet() .stream() .map( - orgId -> getAffiliationRelation( + orgId -> getRelation( v.getKey(), orgId, semantics)) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java index 3d7086739..f29e8d24a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfoJobTest.java @@ -361,6 +361,37 @@ public class PrepareInfoJobTest { } + @Test + public void relationProjectTest() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation/project") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(7, verificationDs.count()); + + } + @Test public void resultOrganizationTest1() throws Exception { @@ -496,6 +527,141 @@ public class PrepareInfoJobTest { } + @Test + public void projectOrganizationTest1() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-projectOrganizationPath", workingDir.toString() + "/projectOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/projectOrganization/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '40|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + } + @Test public void foundLeavesTest1() throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index a4d8f83e3..eb4ade0da 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -65,25 +65,30 @@ public class SparkJobTest { } @Test - public void completeExecution() throws Exception { + public void completeResultExecution() throws Exception { final String graphPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep") + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") .getPath(); final String leavesPath = getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(); final String childParentPath = getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(); final String resultOrgPath = getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") .getPath(); + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + readPath(spark, leavesPath, Leaves.class) .write() .option("compression", "gzip") @@ -94,6 +99,11 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/orgsInput"); + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + SparkResultToOrganizationFromSemRel .main( @@ -104,95 +114,97 @@ public class SparkJobTest { "-outputPath", workingDir.toString() + "/finalrelation", "-leavesPath", workingDir.toString() + "/leavesInput", "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", "-childParentPath", childParentPath, "-workingDir", workingDir.toString() }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD temp = sc .textFile(workingDir.toString() + "/finalrelation") .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + Assertions.assertEquals(36, temp.count()); - Assertions.assertEquals(18, tmp.count()); - tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); - tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); - tmp + JavaRDD result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|")); + Assertions.assertEquals(18, result.count()); + result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + result .foreach( r -> Assertions .assertEquals( PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); - tmp + result .foreach( r -> Assertions .assertEquals( PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, r.getDataInfo().getProvenanceaction().getClassid())); - tmp + result .foreach( r -> Assertions .assertEquals( PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, r.getDataInfo().getProvenanceaction().getClassname())); - tmp + result .foreach( r -> Assertions .assertEquals( "0.85", r.getDataInfo().getTrust())); - Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); - tmp + Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); + result .filter(r -> r.getSource().substring(0, 3).equals("50|")) .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); Assertions .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); Assertions .assertEquals( - 3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); Assertions .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); Assertions .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); Assertions .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); - Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); - tmp + Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + result .filter(r -> r.getSource().substring(0, 3).equals("20|")) .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); Assertions .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); Assertions .assertEquals( - 1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); Assertions .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); Assertions .assertEquals( - 2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); Assertions .assertEquals( - 3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .map(r -> r.getTarget()) .collect() .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .map(r -> r.getTarget()) .collect() @@ -200,14 +212,14 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .map(r -> r.getTarget()) .collect() .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .map(r -> r.getTarget()) .collect() @@ -215,21 +227,21 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .map(r -> r.getTarget()) .collect() .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .map(r -> r.getTarget()) .collect() .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .map(r -> r.getTarget()) .collect() @@ -237,7 +249,7 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) .map(r -> r.getTarget()) .collect() @@ -245,7 +257,7 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) .map(r -> r.getTarget()) .collect() @@ -253,14 +265,14 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) .map(r -> r.getTarget()) .collect() .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) .map(r -> r.getTarget()) .collect() @@ -268,14 +280,14 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .map(r -> r.getTarget()) .collect() .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) .map(r -> r.getTarget()) .collect() @@ -283,21 +295,21 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .map(r -> r.getTarget()) .collect() .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .map(r -> r.getTarget()) .collect() .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) .map(r -> r.getTarget()) .collect() @@ -305,7 +317,7 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) .map(r -> r.getTarget()) .collect() @@ -313,11 +325,555 @@ public class SparkJobTest { Assertions .assertTrue( - tmp + result .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) .map(r -> r.getTarget()) .collect() .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); } + @Test + public void completeProjectExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD temp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(36, temp.count()); + + JavaRDD project = temp.filter(r -> r.getSource().startsWith("40|") || r.getTarget().startsWith("40|")); + Assertions.assertEquals(18, project.count()); + + project.foreach(r -> Assertions.assertEquals(ModelConstants.PARTICIPATION, r.getSubRelType())); + project.foreach(r -> Assertions.assertEquals(ModelConstants.PROJECT_ORGANIZATION, r.getRelType())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + project + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_PROJECT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + project + .foreach( + r -> Assertions + .assertEquals( + "0.85", + r.getDataInfo().getTrust())); + + Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("40|")).count()); + project + .filter(r -> r.getSource().substring(0, 3).equals("40|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_PARTICIPANT, r.getRelClass())); + Assertions + .assertEquals( + 2, project.filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 3, project.filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, project.filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 1, project.filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 1, project.filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions.assertEquals(9, project.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + project + .filter(r -> r.getSource().substring(0, 3).equals("20|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.IS_PARTICIPANT, r.getRelClass())); + Assertions + .assertEquals( + 1, project.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 1, project.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, project.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 2, project.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 3, project.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("40|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("40|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertTrue( + project + .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("40|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } + + @Test + public void singleIterationExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + final String projectOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + readPath(spark, projectOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/projectInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-projectOrganizationPath", workingDir.toString() + "/projectInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString(), + "-iterations", "1" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD temp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(16, temp.count()); + + Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("50|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("50|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getSource().startsWith("40|")).count()); + Assertions.assertEquals(4, temp.filter(r -> r.getTarget().startsWith("40|")).count()); + Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count()); + Assertions.assertEquals(8, temp.filter(r -> r.getSource().startsWith("20|")).count()); + +// JavaRDD result = temp.filter(r -> r.getSource().startsWith("50|") || r.getTarget().startsWith("50|")); +// Assertions.assertEquals(18, result.count()); +// result.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); +// result.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, +// r.getDataInfo().getProvenanceaction().getClassid())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, +// r.getDataInfo().getProvenanceaction().getClassname())); +// result +// .foreach( +// r -> Assertions +// .assertEquals( +// "0.85", +// r.getDataInfo().getTrust())); +// +// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); +// result +// .filter(r -> r.getSource().substring(0, 3).equals("50|")) +// .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); +// Assertions +// .assertEquals( +// 3, result.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); +// +// Assertions.assertEquals(9, result.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); +// result +// .filter(r -> r.getSource().substring(0, 3).equals("20|")) +// .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); +// Assertions +// .assertEquals( +// 1, result.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); +// Assertions +// .assertEquals( +// 2, result.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); +// Assertions +// .assertEquals( +// 3, result.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); +// +// Assertions +// .assertTrue( +// result +// .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) +// .map(r -> r.getTarget()) +// .collect() +// .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java index 77ed4dcbf..7a71240b2 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActionsTest.java @@ -73,21 +73,21 @@ public class StepActionsTest { .execStep( spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result") .getPath(), workingDir.toString() + "/newRelationPath", getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") - .getPath()); + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), ModelConstants.HAS_AUTHOR_INSTITUTION); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -203,19 +203,19 @@ public class StepActionsTest { spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); @@ -248,19 +248,19 @@ public class StepActionsTest { spark, getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relsforiteration1/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/resultOrganization/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/currentIteration/") .getPath(), getClass() .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/childParentOrg/") .getPath(), workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project new file mode 100644 index 000000000..e8e35f555 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/project @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","validated":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result new file mode 100644 index 000000000..5aeabb71b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/graph/result @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization new file mode 100644 index 000000000..81803f29d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/projectOrganization/projectorganization @@ -0,0 +1,5 @@ +{"key":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} +{"key":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]} +{"key":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]} +{"key":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relation deleted file mode 100644 index db7db8fdd..000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/execstep/relation +++ /dev/null @@ -1,14 +0,0 @@ -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation new file mode 100644 index 000000000..10d46b1cb --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/projectorganizationtest/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasParticipant","relType":"datasourceOrganization","source":"40|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file