diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
index a9642d637..ba3633e07 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
@@ -100,16 +100,12 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.speculation=false
- --conf spark.hadoop.mapreduce.map.speculative=false
- --conf spark.hadoop.mapreduce.reduce.speculative=false
+ --conf spark.sql.shuffle.partitions=8000
--sourcePath${sourcePath}
--hive_metastore_uris${hive_metastore_uris}
@@ -132,12 +128,11 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
--sourcePath${sourcePath}
--hive_metastore_uris${hive_metastore_uris}
@@ -160,12 +155,11 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
--sourcePath${sourcePath}
--hive_metastore_uris${hive_metastore_uris}
@@ -188,12 +182,11 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
--sourcePath${sourcePath}
--hive_metastore_uris${hive_metastore_uris}
@@ -218,12 +211,11 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
--sourcePath${workingDir}/orcid/targetOrcidAssoc
--outputPath${workingDir}/orcid/mergedOrcidAssoc
@@ -247,19 +239,14 @@
eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob
dhp-enrichment-${projectVersion}.jar
- --executor-cores=4
- --executor-memory=4G
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=5G
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.speculation=false
- --conf spark.hadoop.mapreduce.map.speculative=false
- --conf spark.hadoop.mapreduce.reduce.speculative=false
--conf spark.sql.shuffle.partitions=15000
--possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
@@ -282,15 +269,12 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.speculation=false
- --conf spark.hadoop.mapreduce.map.speculative=false
- --conf spark.hadoop.mapreduce.reduce.speculative=false
+ --conf spark.sql.shuffle.partitions=8000
--possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
--sourcePath${sourcePath}/dataset
@@ -312,15 +296,12 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.speculation=false
- --conf spark.hadoop.mapreduce.map.speculative=false
- --conf spark.hadoop.mapreduce.reduce.speculative=false
+ --conf spark.sql.shuffle.partitions=8000
--possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
--sourcePath${sourcePath}/otherresearchproduct
@@ -342,15 +323,12 @@
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.speculation=false
- --conf spark.hadoop.mapreduce.map.speculative=false
- --conf spark.hadoop.mapreduce.reduce.speculative=false
+ --conf spark.sql.shuffle.partitions=4000
--possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
--sourcePath${sourcePath}/software
@@ -362,15 +340,6 @@
-
-
-
-
-
-
-
-
-