from __future__ import annotations from airflow.decorators import dag from airflow.models.baseoperator import chain from airflow.models.param import Param from airflow.operators.trigger_dagrun import TriggerDagRunOperator import dag_utils @dag( dag_id="build_openaire_graph", dag_display_name="Build the OpenAIRE graph", params={ "S3_CONN_ID": Param("s3_conn", type='string', description="Airflow connection for S3 endpoint"), "GRAPH_PATH": Param("s3a://graph/tmp/prod_provision/graph", type='string', description=""), "WRKDIR_PATH": Param("s3a://graph/tmp/prod_provision/working_dir", type='string', description=""), "IS_LOOKUP_URL": Param("http://services.openaire.eu:8280/is/services/isLookUp?wsdl", type='string', description=""), "DEDUP_CONFIG_ID": Param("dedup-result-decisiontree-v4", type='string', description=""), "ORCID_PATH": Param("s3a://graph/data/orcid_2023/tables", type='string', description="") }, tags=["openaire"] ) def build_new_graph(): chain( TriggerDagRunOperator( task_id="dedup", task_display_name="Deduplicate Research Results", trigger_dag_id="results_deduplication", wait_for_completion=True, conf={ "S3_CONN_ID": "{{ dag_run.conf.get('S3_CONN_ID') }}", "INPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["inference"], "OUTPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["dedup"], "WRKDIR_PATH": "{{ dag_run.conf.get('WRKDIR_PATH') }}/dedup", "IS_LOOKUP_URL": "{{ dag_run.conf.get('IS_LOOKUP_URL') }}", "DEDUP_CONFIG_ID": "{{ dag_run.conf.get('DEDUP_CONFIG_ID') }}" } ), TriggerDagRunOperator( task_id="consistency", task_display_name="Enforce Consistency of Graph", trigger_dag_id="consistency_graph", wait_for_completion=True, conf={ "S3_CONN_ID": "{{ dag_run.conf.get('S3_CONN_ID') }}", "INPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["dedup"], "OUTPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["consistency"], "WRKDIR_PATH": "{{ dag_run.conf.get('WRKDIR_PATH') }}/dedup", "IS_LOOKUP_URL": "{{ dag_run.conf.get('IS_LOOKUP_URL') }}" } ), TriggerDagRunOperator( task_id="orcid_enrichment", task_display_name="Enrich Graph with ORCID data", trigger_dag_id="orcid_enrichment_graph", wait_for_completion=True, conf={ "S3_CONN_ID": "{{ dag_run.conf.get('S3_CONN_ID') }}", "ORCID_PATH": "{{ dag_run.conf.get('ORCID_PATH') }}", "INPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["consistency"], "OUTPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["orcid_enhancement"], "WRKDIR_PATH": "{{ dag_run.conf.get('WRKDIR_PATH') }}/orcid_enrichment" } ) ) build_new_graph()