From 38ecfd57855b212cda31863ab78bbd2b5ae2e418 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 30 Apr 2020 18:28:46 +0200 Subject: [PATCH] the wf with all the three steps for blacklisting relations --- .../dhp/blacklist/oozie_app/workflow.xml | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index e38d721b96..48351e3fb0 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -12,6 +12,10 @@ postgresPassword the postgres password + + sourcePath + the source path + @@ -38,9 +42,56 @@ --postgresUser${postgresUser} --postgresPassword${postgresPassword} - + + + + yarn + cluster + PrepareMergedRelation + eu.dnetlib.dhp.blacklist.PrepareMergedRelationJob + dhp-blacklist-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/relation + --hive_metastore_uris${hive_metastore_uris} + + + + + + + + yarn + cluster + ApplyBlacklist + eu.dnetlib.dhp.blacklist.SparkRemoveBlacklistedRelationJob + dhp-blacklist-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/relation + --hdfsPath${workingDir}/blacklist + + + + \ No newline at end of file