diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
index 4468382bec..eec67fc5c4 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
@@ -116,17 +116,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=10000
--inputPath${inputPath}/publication
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Publication
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions8000
+ --numPartitions10000
@@ -143,17 +145,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=4000
--inputPath${inputPath}/dataset
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Dataset
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions4000
+ --numPartitions8000
@@ -170,11 +174,13 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=8000
--inputPath${inputPath}/otherresearchproduct
--hiveDbName${hiveDbName}
@@ -197,17 +203,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=1000
--inputPath${inputPath}/software
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Software
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions300
+ --numPartitions1000
@@ -224,17 +232,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=200
--inputPath${inputPath}/datasource
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Datasource
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions100
+ --numPartitions200
@@ -251,17 +261,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=1000
--inputPath${inputPath}/organization
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Organization
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions400
+ --numPartitions1000
@@ -278,17 +290,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=1000
--inputPath${inputPath}/project
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Project
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions100
+ --numPartitions1000
@@ -305,17 +319,19 @@
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+ --conf spark.sql.shuffle.partitions=15000
--inputPath${inputPath}/relation
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Relation
--hiveMetastoreUris${hiveMetastoreUris}
- --numPartitions10000
+ --numPartitions15000