From 8742934843df1de1ff9163a7f4f2a5188dba3db3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Nov 2022 11:32:22 +0100 Subject: [PATCH 1/2] added spark.sql.shuffle.partitions in the last join phase of the result to community through semantic relation propagation --- .../dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml index 81b51443c6..26c7d17889 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -260,6 +260,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=5000 --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/publication @@ -289,6 +290,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=5000 --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/dataset @@ -318,6 +320,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=2000 --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/otherresearchproduct @@ -347,6 +350,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=1000 --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/software From ed64618235caec74176ee8b5046b2bf8c8a808df Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Nov 2022 16:06:51 +0100 Subject: [PATCH 2/2] increased spark.sql.shuffle.partitions in the last join phase of the result (publication) to community through semantic relation propagation --- .../dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml index 26c7d17889..58d5bb26de 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -260,7 +260,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=5000 + --conf spark.sql.shuffle.partitions=10000 --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/publication