From d13e3d3f6896efc23ecac430e629e163cb4615c6 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 23 Jun 2020 11:01:42 +0200 Subject: [PATCH] fixed paths --- .../dhp/broker/oa/GenerateEventsJob.java | 5 +- .../dhp/broker/oa/JoinEntitiesJob.java | 13 ++-- .../dhp/broker/oa/PrepareGroupsJob.java | 2 +- .../oa/generate_all/oozie_app/workflow.xml | 1 - .../dhp/broker/oa/generate_events.json | 6 -- .../broker/oa/partial/oozie_app/workflow.xml | 59 +------------------ 6 files changed, 8 insertions(+), 78 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java index 3ea0086ff..089fbf6d4 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java @@ -44,9 +44,6 @@ public class GenerateEventsJob { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphPath = parser.get("graphPath"); - log.info("graphPath: {}", graphPath); - final String workingPath = parser.get("workingPath"); log.info("workingPath: {}", workingPath); @@ -70,7 +67,7 @@ public class GenerateEventsJob { ClusterUtils.removeDir(spark, eventsPath); final Dataset groups = ClusterUtils - .readPath(spark, graphPath + "/relation", ResultGroup.class); + .readPath(spark, workingPath + "/relation", ResultGroup.class); final Dataset events = groups .map( diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java index dac308f36..da77a4673 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java @@ -43,9 +43,6 @@ public class JoinEntitiesJob { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphPath = parser.get("graphPath"); - log.info("graphPath: {}", graphPath); - final String workingPath = parser.get("workingPath"); log.info("workingPath: {}", workingPath); @@ -59,16 +56,16 @@ public class JoinEntitiesJob { ClusterUtils.removeDir(spark, joinedEntitiesPath); final Dataset r0 = ClusterUtils - .readPath(spark, graphPath + "/simpleEntities", OaBrokerMainEntity.class); + .readPath(spark, workingPath + "/simpleEntities", OaBrokerMainEntity.class); final Dataset r1 = join( - r0, ClusterUtils.readPath(spark, graphPath + "/relatedProjects", RelatedProject.class)); + r0, ClusterUtils.readPath(spark, workingPath + "/relatedProjects", RelatedProject.class)); final Dataset r2 = join( - r1, ClusterUtils.readPath(spark, graphPath + "/relatedDatasets", RelatedDataset.class)); + r1, ClusterUtils.readPath(spark, workingPath + "/relatedDatasets", RelatedDataset.class)); final Dataset r3 = join( - r2, ClusterUtils.readPath(spark, graphPath + "/relatedPublications", RelatedPublication.class)); + r2, ClusterUtils.readPath(spark, workingPath + "/relatedPublications", RelatedPublication.class)); final Dataset r4 = join( - r3, ClusterUtils.readPath(spark, graphPath + "/relatedSoftwares", RelatedSoftware.class)); + r3, ClusterUtils.readPath(spark, workingPath + "/relatedSoftwares", RelatedSoftware.class)); r4.write().mode(SaveMode.Overwrite).json(joinedEntitiesPath); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java index aa057eee8..159047dad 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java @@ -58,7 +58,7 @@ public class PrepareGroupsJob { ClusterUtils.removeDir(spark, groupsPath); final Dataset results = ClusterUtils - .readPath(spark, graphPath + "/joinedEntities", OaBrokerMainEntity.class); + .readPath(spark, workingPath + "/joinedEntities", OaBrokerMainEntity.class); final Dataset mergedRels = ClusterUtils .readPath(spark, graphPath + "/relation", Relation.class) diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml index bec6f221d..18e2eedca 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -283,7 +283,6 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 - --graphPath${graphInputPath} --workingPath${workingPath} --isLookupUrl${isLookupUrl} --dedupConfProfile${dedupConfProfId} diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json index d185bc73d..7ae076159 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json @@ -1,10 +1,4 @@ [ - { - "paramName": "g", - "paramLongName": "graphPath", - "paramDescription": "the path where there the graph is stored", - "paramRequired": true - }, { "paramName": "o", "paramLongName": "workingPath", diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml index 253910595..1ccdef929 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml @@ -73,68 +73,12 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - yarn - cluster - PrepareRelatedPublicationsJob - eu.dnetlib.dhp.broker.oa.PrepareRelatedPublicationsJob - dhp-broker-events-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphPath${graphInputPath} - --workingPath${workingPath} - - - - - - - - - yarn - cluster - PrepareRelatedDatasetsJob - eu.dnetlib.dhp.broker.oa.PrepareRelatedDatasetsJob - dhp-broker-events-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphPath${graphInputPath} - --workingPath${workingPath} - - - - @@ -201,7 +145,6 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 - --graphPath${graphInputPath} --workingPath${workingPath} --isLookupUrl${isLookupUrl} --dedupConfProfile${dedupConfProfId}