import os from datetime import timedelta, datetime import pendulum from airflow import DAG from airflow.hooks.base import BaseHook from airflow.models.baseoperator import chain from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator from airflow.providers.cncf.kubernetes.secret import Secret default_args = { "execution_timeout": timedelta(days=6), "retries": int(os.getenv("DEFAULT_TASK_RETRIES", 1)), "retry_delay": timedelta(seconds=int(os.getenv("DEFAULT_RETRY_DELAY_SECONDS", 60))), } conn = BaseHook.get_connection("opensearch_default") dag = DAG( 'antispam_batch_check', default_args=default_args, schedule=None, dagrun_timeout=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule_interval=timedelta(days=1) ) secrets = [ Secret( deploy_type='env', deploy_target='CURATION_OPENSEARCH__USER', secret='opensearch-conn-secrets', key='username', ), Secret( deploy_type='env', deploy_target='CURATION_OPENSEARCH__PASSWORD', secret='opensearch-conn-secrets', key='password', ), ] # Define the KubernetesPodOperator task = KubernetesPodOperator( task_id='antispam_checker', name='antispam_checker', namespace='kg-airflow', image='gbloisi/curation:1.0.0', image_pull_policy="Always", cmds=['python3'], arguments=['/antispam-batch.py', "--opensearch.host", conn.host, "--opensearch.port", str(conn.port), "--openai.host", "local-ai.kg-airflow.svc.cluster.local", "--openai.port", "8000", "--parallelism", "36" ], secrets=secrets, is_delete_operator_pod=True, in_cluster=True, get_logs=True, dag=dag ) # Set the task dependencies chain(task)