diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index dcaedeb63..70a1c649e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -8,6 +8,8 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs; +import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8b44a079c..1de9d73fb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -7,12 +7,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; import java.util.HashSet; import java.util.Set; -import java.util.stream.Collectors; +import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs; +import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry; +import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.*; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 8bd1f6fb7..8829aa819 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -9,6 +9,8 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs; +import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/CountrySbs.java similarity index 92% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/CountrySbs.java index 5dbd912cb..5e1d23806 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/CountrySbs.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.countrypropagation; +package eu.dnetlib.dhp.countrypropagation.pojo; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/DatasourceCountry.java similarity index 93% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/DatasourceCountry.java index 4edc0f7a2..5d3b28b00 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/DatasourceCountry.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.countrypropagation; +package eu.dnetlib.dhp.countrypropagation.pojo; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/ResultCountrySet.java similarity index 82% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/ResultCountrySet.java index 8c29424f2..f431b3057 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/pojo/ResultCountrySet.java @@ -1,5 +1,7 @@ -package eu.dnetlib.dhp.countrypropagation; +package eu.dnetlib.dhp.countrypropagation.pojo; + +import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1ec521af1..ed660e2ac 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -51,8 +51,7 @@ public class SparkResultToProjectThroughSemRelJob { final String alreadyLinkedPath = parser.get("alreadyLinkedPath"); log.info("alreadyLinkedPath {}: ", alreadyLinkedPath); - final Boolean saveGraph = Boolean.valueOf(parser.get("saveGraph")); - log.info("saveGraph: {}", saveGraph); + SparkConf conf = new SparkConf(); @@ -60,11 +59,9 @@ public class SparkResultToProjectThroughSemRelJob { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } + execPropagation( - spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph); + spark, outputPath, alreadyLinkedPath, potentialUpdatePath); }); } @@ -72,13 +69,12 @@ public class SparkResultToProjectThroughSemRelJob { SparkSession spark, String outputPath, String alreadyLinkedPath, - String potentialUpdatePath, - Boolean saveGraph) { + String potentialUpdatePath) { Dataset toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class); - if (saveGraph) { + toaddrelations .joinWith( alreadyLinked, @@ -89,7 +85,7 @@ public class SparkResultToProjectThroughSemRelJob { .mode(SaveMode.Append) .option("compression", "gzip") .json(outputPath); - } + } private static FlatMapFunction, Relation> mapRelationRn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 49d60b85a..90fa97196 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -56,11 +56,7 @@ public class SparkResultToCommunityFromOrganizationJob { final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); - final Boolean saveGraph = Optional - .ofNullable(parser.get("saveGraph")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); @@ -72,10 +68,9 @@ public class SparkResultToCommunityFromOrganizationJob { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); - if (saveGraph) { + execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath); - } + }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index a10737849..7df2b46b2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -70,13 +70,10 @@ public class SparkResultToCommunityThroughSemRelJob { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } - if (saveGraph) { + execPropagation( spark, inputPath, outputPath, preparedInfoPath, resultClazz); - } + }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index dfca06874..ef7ea6bc6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -98,13 +98,13 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { String leavesPath, String childParentPath, String resultOrganizationPath, - String graphPath, + String relationPath, String workingPath, String outputPath, int iterations) { if (iterations == 1) { doPropagateOnce( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + spark, leavesPath, childParentPath, resultOrganizationPath, relationPath, workingPath, outputPath); } else { @@ -123,26 +123,26 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { notReachedFirstParent); doPropagate( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + spark, leavesPath, childParentPath, resultOrganizationPath, relationPath, workingPath, outputPath, propagationCounter); } } private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath, - String resultOrganizationPath, String graphPath, String workingPath, + String resultOrganizationPath, String relationPath, String workingPath, String outputPath) { StepActions .execStep( - spark, graphPath, workingPath + NEW_RELATION_PATH, + spark, relationPath, workingPath + NEW_RELATION_PATH, leavesPath, childParentPath, resultOrganizationPath); addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); } private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, - String resultOrganizationPath, String graphPath, String workingPath, String outputPath, + String resultOrganizationPath, String relationPath, String workingPath, String outputPath, PropagationCounter propagationCounter) { int iteration = 0; long leavesCount; @@ -151,7 +151,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { iteration++; StepActions .execStep( - spark, graphPath, workingPath + NEW_RELATION_PATH, + spark, relationPath, workingPath + NEW_RELATION_PATH, leavesPath, childParentPath, resultOrganizationPath); StepActions .prepareForNextStep( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index 1adbbe60e..9137094f2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -27,10 +27,10 @@ import scala.Tuple2; public class StepActions implements Serializable { public static void execStep(SparkSession spark, - String graphPath, String newRelationPath, + String relationPath, String newRelationPath, String leavesPath, String chldParentOrgPath, String resultOrgPath) { - Dataset relationGraph = readPath(spark, graphPath, Relation.class); + Dataset relationGraph = readPath(spark, relationPath, Relation.class); // select only the relation source target among those proposed by propagation that are not already existent getNewRels( newRelationPath, relationGraph, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 79944ba09..c9edfeafd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -176,10 +176,6 @@ pathMap ${pathMap} - - outputPath - ${workingDir}/results - @@ -220,10 +216,6 @@ sourcePath ${outputPath} - - - - @@ -240,10 +232,6 @@ sourcePath ${outputPath} - - outputPath - ${outputPath} - organizationtoresultcommunitymap ${organizationtoresultcommunitymap} @@ -264,10 +252,6 @@ sourcePath ${outputPath} - - outputPath - ${outputPath} - allowedsemrels ${allowedsemrelsresultproject} @@ -280,7 +264,7 @@ - ${wf:appPath()}/result_project + ${wf:appPath()}/community_sem_rel @@ -288,10 +272,7 @@ sourcePath ${outputPath} - - outputPath - ${workingDir}/communitysemrel - + allowedsemrels ${allowedsemrelscommunitysemrel} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index b02e00949..e14f464ab 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -186,7 +186,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${outputPath} - --workingPath${workingDir}/eoscTag + --workingPath${workingDir}/bulktag @@ -230,7 +230,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${outputPath}/publication - --workingPath${workingDir}/eoscContextTag/publication + --workingPath${workingDir}/bulktag/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --datasourceMapPath${workingDir}/datasourcemaster @@ -256,7 +256,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${outputPath}/dataset - --workingPath${workingDir}/eoscContextTag/dataset + --workingPath${workingDir}/bulktag/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --datasourceMapPath${workingDir}/datasourcemaster @@ -281,7 +281,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${outputPath}/software - --workingPath${workingDir}/eoscContextTag/software + --workingPath${workingDir}/bulktag/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --datasourceMapPath${workingDir}/datasourcemaster @@ -306,14 +306,24 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${outputPath}/otherresearchproduct - --workingPath${workingDir}/eoscContextTag/otherresearchproduct + --workingPath${workingDir}/bulktag/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --datasourceMapPath${workingDir}/datasourcemaster - + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index aed304345..cb227672e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -30,20 +30,13 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - + @@ -222,7 +215,7 @@ --sourcePath${sourcePath}/publication --workingPath${workingDir}/country/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication + --outputPath${workingDir}/country/result/publication @@ -251,7 +244,7 @@ --sourcePath${sourcePath}/dataset --workingPath${workingDir}/country/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset + --outputPath${workingDir}/country/result/dataset @@ -280,7 +273,7 @@ --sourcePath${sourcePath}/otherresearchproduct --workingPath${workingDir}/country/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct + --outputPath${workingDir}/country/result/otherresearchproduct @@ -309,14 +302,21 @@ --sourcePath${sourcePath}/software --workingPath${workingDir}/country/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software + --outputPath${workingDir}/country/result/software - - + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index b487dfcf1..6377ae795 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -390,7 +390,16 @@ - + + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index ddb879be8..93a2f98be 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -8,10 +8,7 @@ allowedsemrels the allowed semantics - - outputPath - the output path - + @@ -76,16 +73,22 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --saveGraph${saveGraph} --hive_metastore_uris${hive_metastore_uris} - --outputPath${outputPath}/relation + --outputPath${sourcePath}/relation --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index f753adb42..7402c7679 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -8,10 +8,7 @@ organizationtoresultcommunitymap organization community map - - outputPath - the output path - + @@ -25,21 +22,12 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - yarn @@ -97,7 +85,7 @@ --outputPath${workingDir}/communityorganization/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --saveGraph${saveGraph} + @@ -126,7 +114,7 @@ --outputPath${workingDir}/communityorganization/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --saveGraph${saveGraph} + @@ -155,7 +143,7 @@ --outputPath${workingDir}/communityorganization/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --saveGraph${saveGraph} + @@ -184,14 +172,22 @@ --outputPath${workingDir}/communityorganization/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --saveGraph${saveGraph} + - + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index ba0e66372..8019eec24 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -18,21 +18,12 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - @@ -214,8 +205,8 @@ --sourcePath${sourcePath}/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/publication + @@ -243,8 +234,8 @@ --sourcePath${sourcePath}/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/dataset + @@ -272,8 +263,8 @@ --sourcePath${sourcePath}/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/otherresearchproduct + @@ -301,15 +292,22 @@ --sourcePath${sourcePath}/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/software + - - + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml index 7df865387..7918df120 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -4,10 +4,7 @@ sourcePath the source path - - - - + @@ -27,23 +24,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - yarn @@ -99,12 +79,19 @@ --workingDir${workingDir}/affiliationSemanticRelation/working --iterations${iterations} + + + + + + + + + - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java index d9b879de8..e2a6ed872 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java index 797d1c979..5ceb8f0a6 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java @@ -1,12 +1,11 @@ package eu.dnetlib.dhp.countrypropagation; -import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; - import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index 2fe1bc574..d51db3278 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -33,32 +33,32 @@ public class ProjectPropagationJobTest { private static SparkSession spark; private static Path workingDir; + private static final SparkConf conf = new SparkConf(); @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); + conf.setAppName(ProjectPropagationJobTest.class.getSimpleName()); conf.setMaster("local[*]"); conf.set("spark.driver.host", "localhost"); conf.set("hive.metastore.local", "true"); conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + spark = SparkSession - .builder() - .appName(ProjectPropagationJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + .builder() + .appName(ProjectPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); } @AfterAll public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); } @@ -71,6 +71,7 @@ public class ProjectPropagationJobTest { @Test void NoUpdateTest() throws Exception { + workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName()); final String potentialUpdateDate = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") @@ -82,10 +83,10 @@ public class ProjectPropagationJobTest { SparkResultToProjectThroughSemRelJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-hive_metastore_uris", "", - "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", "-potentialUpdatePath", potentialUpdateDate, "-alreadyLinkedPath", alreadyLinkedPath, @@ -98,6 +99,10 @@ public class ProjectPropagationJobTest { .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Assertions.assertEquals(0, tmp.count()); + + FileUtils.deleteDirectory(workingDir.toFile()); + + } /** @@ -107,6 +112,12 @@ public class ProjectPropagationJobTest { */ @Test void UpdateTenTest() throws Exception { + workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName()); + spark = SparkSession + .builder() + .appName(ProjectPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -118,10 +129,10 @@ public class ProjectPropagationJobTest { SparkResultToProjectThroughSemRelJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-hive_metastore_uris", "", - "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", "-potentialUpdatePath", potentialUpdatePath, "-alreadyLinkedPath", alreadyLinkedPath, @@ -169,6 +180,9 @@ public class ProjectPropagationJobTest { .sql( "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") .count()); + + FileUtils.deleteDirectory(workingDir.toFile()); + } /** @@ -179,6 +193,12 @@ public class ProjectPropagationJobTest { */ @Test void UpdateMixTest() throws Exception { + workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName()); + spark = SparkSession + .builder() + .appName(ProjectPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") @@ -190,10 +210,10 @@ public class ProjectPropagationJobTest { SparkResultToProjectThroughSemRelJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-hive_metastore_uris", "", - "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", "-potentialUpdatePath", potentialUpdatepath, "-alreadyLinkedPath", alreadyLinkedPath, @@ -244,5 +264,7 @@ public class ProjectPropagationJobTest { .sql( "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") .count()); + + FileUtils.deleteDirectory(workingDir.toFile()); } }