lot1-kickoff/airflow/dags/common.py

29 lines
972 B
Python
Raw Normal View History

2024-03-25 17:52:56 +01:00
import os
import zipfile
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.utils.file import TemporaryDirectory
def strip_prefix(s, p):
if s.startswith(p):
return s[len(p):]
else:
return s
def s3_dowload_unzip_upload(s3conn: str, src_key: str, src_bucket: str, dest_bucket: str):
hook = S3Hook(s3conn, transfer_config_args={'use_threads': False})
with TemporaryDirectory() as dwl_dir:
with TemporaryDirectory() as tmp_dir:
archive = f'{dwl_dir}/{src_key}'
hook.download_file(key=src_key, bucket_name=src_bucket, local_path=dwl_dir, preserve_file_name=True,
use_autogenerated_subdir=False)
with zipfile.ZipFile(archive, 'r') as zip_ref:
2024-06-03 22:03:06 +02:00
for info in zip_ref.infolist():
with zip_ref.open(info.filename) as file:
hook.load_file_obj(file, info.filename, dest_bucket, replace=True)
2024-03-25 17:52:56 +01:00