diff --git a/airflow/dags/run_spark.py b/airflow/dags/run_spark.py index 0edd99b..69ed76e 100644 --- a/airflow/dags/run_spark.py +++ b/airflow/dags/run_spark.py @@ -58,13 +58,13 @@ default_args = { } spec =SparkConfigurator( - name="spark-scholix", \ - mainClass="eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump", \ - jarLocation = 's3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar'\ - ,arguments =[ "--sourcePath", "s3a://raw-graph/01", "--targetPath", "s3a://scholix"],\ - executor_cores=10,\ - executor_memory="4G", \ - executor_instances=1, \ + name="spark-scholix-{{ ds }}-{{ task_instance.try_number }}", + mainClass="eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump", + jarLocation = 's3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar', + arguments =[ "--sourcePath", "s3a://raw-graph/01", "--targetPath", "s3a://scholix"],\ + executor_cores=10, + executor_memory="4G", + executor_instances=1, executor_memoryOverhead="3G").get_configuration() logger.info("found configuration") @@ -86,7 +86,7 @@ submit = SparkKubernetesOperator( kubernetes_conn_id="kubernetes_default", # do_xcom_push=True, # delete_on_termination=True, - base_container_name="spark-kubernetes-driver", + # base_container_name="spark-kubernetes-driver", dag=dag )