diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
index 3c73fefb6..250f7398f 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
@@ -28,6 +28,28 @@
sparkExecutorCores
number of cores used by single executor
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
@@ -42,6 +64,10 @@
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
@@ -53,20 +79,19 @@
-
-
-
yarn
cluster
Create Similarity Relations
eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels
dhp-dedup-openaire-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-mtyarn
--i${graphBasePath}
@@ -80,21 +105,19 @@
-
-
-
yarn
cluster
Create Merge Relations
eu.dnetlib.dhp.oa.dedup.SparkCreateMergeRels
dhp-dedup-openaire-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
- --conf spark.sql.warehouse.dir="/user/hive/warehouse"
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-mtyarn
--i${graphBasePath}
@@ -108,21 +131,19 @@
-
-
-
yarn
cluster
Create Dedup Record
eu.dnetlib.dhp.oa.dedup.SparkCreateDedupRecord
dhp-dedup-openaire-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
- --conf spark.sql.warehouse.dir="/user/hive/warehouse"
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-mtyarn
--i${graphBasePath}