Add sparkExecutorMemoryOverhead workflow config to set off-heap memory for Spark actions. If not explicitly set it is defaulted to 1Gb

This commit is contained in:
Giambattista Bloisi 2023-08-29 16:04:19 +02:00
parent bf35280ea6
commit 6b1c05d118
2 changed files with 16 additions and 4 deletions

View File

@ -15,4 +15,8 @@
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<value>1G</value>
</property>
</configuration>

View File

@ -18,11 +18,15 @@
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
<description>heap memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
<description>heap memory for individual executor</description>
</property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<description>off-heap memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
@ -86,6 +90,7 @@
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
@ -111,6 +116,7 @@
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
@ -135,8 +141,9 @@
<class>eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -159,8 +166,9 @@
<class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}