fixed paths

2020-06-23 11:01:42 +02:00 · 2020-06-23 11:01:42 +02:00 · d13e3d3f68
parent 8386c6f90d
commit d13e3d3f68
6 changed files with 8 additions and 78 deletions
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
@ -44,9 +44,6 @@ public class GenerateEventsJob {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String graphPath = parser.get("graphPath");
-		log.info("graphPath: {}", graphPath);
-
 		final String workingPath = parser.get("workingPath");
 		log.info("workingPath: {}", workingPath);

@ -70,7 +67,7 @@ public class GenerateEventsJob {
 			ClusterUtils.removeDir(spark, eventsPath);

 			final Dataset<ResultGroup> groups = ClusterUtils
-				.readPath(spark, graphPath + "/relation", ResultGroup.class);
+				.readPath(spark, workingPath + "/relation", ResultGroup.class);

 			final Dataset<Event> events = groups
 				.map(
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinEntitiesJob.java
@ -43,9 +43,6 @@ public class JoinEntitiesJob {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String graphPath = parser.get("graphPath");
-		log.info("graphPath: {}", graphPath);
-
 		final String workingPath = parser.get("workingPath");
 		log.info("workingPath: {}", workingPath);

@ -59,16 +56,16 @@ public class JoinEntitiesJob {
 			ClusterUtils.removeDir(spark, joinedEntitiesPath);

 			final Dataset<OaBrokerMainEntity> r0 = ClusterUtils
-				.readPath(spark, graphPath + "/simpleEntities", OaBrokerMainEntity.class);
+				.readPath(spark, workingPath + "/simpleEntities", OaBrokerMainEntity.class);

 			final Dataset<OaBrokerMainEntity> r1 = join(
-				r0, ClusterUtils.readPath(spark, graphPath + "/relatedProjects", RelatedProject.class));
+				r0, ClusterUtils.readPath(spark, workingPath + "/relatedProjects", RelatedProject.class));
 			final Dataset<OaBrokerMainEntity> r2 = join(
-				r1, ClusterUtils.readPath(spark, graphPath + "/relatedDatasets", RelatedDataset.class));
+				r1, ClusterUtils.readPath(spark, workingPath + "/relatedDatasets", RelatedDataset.class));
 			final Dataset<OaBrokerMainEntity> r3 = join(
-				r2, ClusterUtils.readPath(spark, graphPath + "/relatedPublications", RelatedPublication.class));
+				r2, ClusterUtils.readPath(spark, workingPath + "/relatedPublications", RelatedPublication.class));
 			final Dataset<OaBrokerMainEntity> r4 = join(
-				r3, ClusterUtils.readPath(spark, graphPath + "/relatedSoftwares", RelatedSoftware.class));
+				r3, ClusterUtils.readPath(spark, workingPath + "/relatedSoftwares", RelatedSoftware.class));

 			r4.write().mode(SaveMode.Overwrite).json(joinedEntitiesPath);

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
@ -58,7 +58,7 @@ public class PrepareGroupsJob {
 			ClusterUtils.removeDir(spark, groupsPath);

 			final Dataset<OaBrokerMainEntity> results = ClusterUtils
-				.readPath(spark, graphPath + "/joinedEntities", OaBrokerMainEntity.class);
+				.readPath(spark, workingPath + "/joinedEntities", OaBrokerMainEntity.class);

 			final Dataset<Relation> mergedRels = ClusterUtils
 				.readPath(spark, graphPath + "/relation", Relation.class)
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
@ -283,7 +283,6 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--dedupConfProfile</arg><arg>${dedupConfProfId}</arg>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
@ -1,10 +1,4 @@
 [
-	{
-		"paramName": "g",
-		"paramLongName": "graphPath",
-		"paramDescription": "the path where there the graph is stored",
-		"paramRequired": true
-	},
 	{
 		"paramName": "o",
 		"paramLongName": "workingPath",
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml
@ -73,68 +73,12 @@
        </configuration>
    </global>

-    <start to="ensure_working_path"/>
+    <start to="join_entities"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    
-    <action name="ensure_working_path">
-        <fs>
-            <mkdir path='${workingPath}'/>
-        </fs>
-        <ok to="prepare_related_publications"/>
-        <error to="Kill"/>
-    </action>
-    
-    <action name="prepare_related_publications">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>PrepareRelatedPublicationsJob</name>
-            <class>eu.dnetlib.dhp.broker.oa.PrepareRelatedPublicationsJob</class>
-            <jar>dhp-broker-events-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
-                --executor-memory=${sparkExecutorMemory}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
-            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
-        </spark>
-        <ok to="prepare_related_datasets"/>
-        <error to="Kill"/>
-    </action> 
-
- 
-    <action name="prepare_related_datasets">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>PrepareRelatedDatasetsJob</name>
-            <class>eu.dnetlib.dhp.broker.oa.PrepareRelatedDatasetsJob</class>
-            <jar>dhp-broker-events-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
-                --executor-memory=${sparkExecutorMemory}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
-            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
-        </spark>
-        <ok to="join_entities"/>
-        <error to="Kill"/>
-    </action>
    
    <action name="join_entities">
        <spark xmlns="uri:oozie:spark-action:0.2">
@ -201,7 +145,6 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--dedupConfProfile</arg><arg>${dedupConfProfId}</arg>