diff --git a/workflow/dnet/build_scholexplorer_graph.py b/workflow/dnet/build_scholexplorer_graph.py index bf29f94..3d75417 100644 --- a/workflow/dnet/build_scholexplorer_graph.py +++ b/workflow/dnet/build_scholexplorer_graph.py @@ -6,6 +6,7 @@ from airflow.models.param import Param from airflow.providers.cncf.kubernetes.operators.spark_kubernetes import SparkKubernetesOperator from spark_configurator import SparkConfigurator +from dag_utils import SPARK_RESOURCES_PROFILES EXECUTION_TIMEOUT = int(os.getenv("EXECUTION_TIMEOUT", 6)) @@ -36,9 +37,10 @@ def build_scholexplorer_dag(): template_spec=SparkConfigurator( name="orcidpropagate-{{ ds }}-{{ task_instance.try_number }}", mainClass="eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump", + profile=SPARK_RESOURCES_PROFILES['medium'], jarLocation='s3a://binaries/dhp-shade-package-1.2.5-SNAPSHOT.jar', - arguments=["--sourcePath", "{{ dag_run.conf.get('ORCID_PATH') }}", - "--targetPath", "{{ dag_run.conf.get('INPUT_PATH') }}", + arguments=["--sourcePath", "{{ dag_run.conf.get('INPUT_PATH') }}", + "--targetPath", "{{ dag_run.conf.get('OUTPUT_PATH') }}", ]).get_configuration(), kubernetes_conn_id="kubernetes_default" )