diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
index 7c500493f..29e8ddea2 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
@@ -89,7 +89,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=15000
+ --conf spark.sql.shuffle.partitions=3840
--graphBasePath${graphBasePath}
--o${graphOutputPath}
@@ -114,7 +114,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=15000
+ --conf spark.sql.shuffle.partitions=3840
--graphInputPath${graphBasePath}
--outputPath${workingPath}/grouped_entities
@@ -148,7 +148,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/datasource
@@ -173,7 +173,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/project
@@ -198,7 +198,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/organization
@@ -223,7 +223,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/publication
@@ -248,7 +248,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/dataset
@@ -273,7 +273,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/software
@@ -298,7 +298,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${workingPath}/grouped_entities
--outputPath${graphOutputPath}/otherresearchproduct
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
index 02fdd8431..3681b5bdb 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
@@ -132,7 +132,7 @@
--isLookUpUrl${isLookUpUrl}
--actionSetId${actionSetId}
--workingPath${workingPath}
- --numPartitions8000
+ --numPartitions5000
@@ -160,7 +160,7 @@
--actionSetId${actionSetId}
--workingPath${workingPath}
--whiteListPath${whiteListPath}
- --numPartitions8000
+ --numPartitions5000
@@ -241,7 +241,7 @@
--workingPath${workingPath}
--isLookUpUrl${isLookUpUrl}
--actionSetId${actionSetIdOpenorgs}
- --numPartitions8000
+ --numPartitions5000
diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
index 58d5bb26d..193f2a776 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@@ -260,7 +260,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.sql.shuffle.partitions=10000
+ --conf spark.sql.shuffle.partitions=3840
--preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
--sourcePath${sourcePath}/publication
@@ -290,7 +290,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --conf spark.sql.shuffle.partitions=5000
+ --conf spark.sql.shuffle.partitions=3840
--preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
--sourcePath${sourcePath}/dataset
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
index ee79f4f1a..68540aba1 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
@@ -115,7 +115,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/publication
--outputPath${graphOutputPath}/publication
@@ -141,7 +141,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/dataset
--outputPath${graphOutputPath}/dataset
@@ -167,7 +167,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/otherresearchproduct
--outputPath${graphOutputPath}/otherresearchproduct
@@ -193,7 +193,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/software
--outputPath${graphOutputPath}/software
@@ -219,7 +219,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/datasource
--outputPath${graphOutputPath}/datasource
@@ -245,7 +245,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/organization
--outputPath${graphOutputPath}/organization
@@ -271,7 +271,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/project
--outputPath${graphOutputPath}/project
@@ -297,7 +297,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphInputPath}/relation
--outputPath${graphOutputPath}/relation
@@ -339,7 +339,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/publication
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
@@ -366,7 +366,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/dataset
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
@@ -393,7 +393,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/otherresearchproduct
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
@@ -420,7 +420,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/software
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
@@ -449,7 +449,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}
--workingDir${workingDir}/working/hostedby
@@ -481,7 +481,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/publication
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
@@ -510,7 +510,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/dataset
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
@@ -539,7 +539,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/otherresearchproduct
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
@@ -568,7 +568,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=7680
+ --conf spark.sql.shuffle.partitions=3840
--inputPath${graphOutputPath}/software
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software