diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
index 2512fc5bc7..01aaadae5b 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
@@ -162,6 +162,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=15000
--inputPath${graphInputPath}/publication
@@ -197,6 +198,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=8000
--inputPath${graphInputPath}/dataset
@@ -232,6 +234,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=5000
--inputPath${graphInputPath}/otherresearchproduct
@@ -267,6 +270,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=2000
--inputPath${graphInputPath}/software
@@ -302,6 +306,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=1000
--inputPath${graphInputPath}/datasource
@@ -337,6 +342,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=1000
--inputPath${graphInputPath}/organization
@@ -372,6 +378,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=2000
--inputPath${graphInputPath}/project
@@ -407,6 +414,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=2000
--inputPath${graphInputPath}/person
@@ -442,6 +450,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.autoBroadcastJoinThreshold=-1
--conf spark.sql.shuffle.partitions=20000
--inputPath${graphInputPath}/relation