1
0
Fork 0

Merge pull request 'Add sparkExecutorMemoryOverhead workflow config to set off-heap memory for Spark actions. If not explicitly set it is defaulted to 1Gb' (#331) from consistencywf_memoryoverhead_conf into beta

Reviewed-on: D-Net/dnet-hadoop#331
This commit is contained in:
Claudio Atzori 2023-08-29 16:31:36 +02:00
commit 488d9a1cea
2 changed files with 16 additions and 4 deletions

View File

@ -15,4 +15,8 @@
<name>oozie.action.sharelib.for.spark</name> <name>oozie.action.sharelib.for.spark</name>
<value>spark2</value> <value>spark2</value>
</property> </property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<value>1G</value>
</property>
</configuration> </configuration>

View File

@ -18,11 +18,15 @@
</property> </property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
<description>memory for driver process</description> <description>heap memory for driver process</description>
</property> </property>
<property> <property>
<name>sparkExecutorMemory</name> <name>sparkExecutorMemory</name>
<description>memory for individual executor</description> <description>heap memory for individual executor</description>
</property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<description>off-heap memory for individual executor</description>
</property> </property>
<property> <property>
<name>sparkExecutorCores</name> <name>sparkExecutorCores</name>
@ -86,6 +90,7 @@
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar> <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
@ -111,6 +116,7 @@
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar> <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
@ -135,8 +141,9 @@
<class>eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob</class> <class>eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar> <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -159,8 +166,9 @@
<class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class> <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
<jar>dhp-dedup-openaire-${projectVersion}.jar</jar> <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}