lot1-kickoff/airflow/dags/S3_delete.py

43 lines
1.2 KiB
Python

import os
from datetime import timedelta
import pendulum
from airflow.decorators import dag
from airflow.decorators import task
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
S3_CONN_ID = os.getenv("S3_CONN_ID", "s3_conn")
EXECUTION_TIMEOUT = int(os.getenv("EXECUTION_TIMEOUT", 6))
default_args = {
"execution_timeout": timedelta(hours=EXECUTION_TIMEOUT),
"retries": int(os.getenv("DEFAULT_TASK_RETRIES", 1)),
"retry_delay": timedelta(seconds=int(os.getenv("DEFAULT_RETRY_DELAY_SECONDS", 60))),
}
@dag(
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
schedule=None,
catchup=False,
default_args=default_args,
params={
"prefix": "Key prefix of files to delete",
"bucket": "bucket containing files to delete",
},
tags=["s3"],
)
def s3_delete():
@task
def delete(**context):
hook = S3Hook(S3_CONN_ID, transfer_config_args={'use_threads': False})
keys = hook.list_keys(bucket_name=context["params"]["bucket"], prefix=context["params"]["prefix"])
hook.delete_objects(bucket=context["params"]["bucket"], keys=keys)
for key in keys:
print(f"{key} deleted!")
delete()
s3_delete()