initial stage

This commit is contained in:
Giambattista Bloisi 2024-08-06 11:20:52 +02:00
parent a2e7c4beb6
commit 72ddac35cb
2 changed files with 26 additions and 12 deletions

23
airflow/dags/dag_utils.py Normal file
View File

@ -0,0 +1,23 @@
from airflow.hooks.base import BaseHook
from opensearchpy import OpenSearch
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
def get_opensearch_client(kwargs) -> OpenSearch:
conn = BaseHook.get_connection(kwargs["params"]["OPENSEARCH_CONN_ID"])
return OpenSearch(
hosts=[{'host': conn.host, 'port': conn.port}],
http_auth=(conn.login, conn.password),
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
pool_maxsize=20,
timeout=180
)
def get_bucket_name(context: dict, hook: S3Hook, param_name: str):
bucket_name = context["params"][param_name]
if not bucket_name:
bucket_name = hook.extra_args['bucket_name']
return bucket_name

View File

@ -1,21 +1,12 @@
import json import json
from datetime import datetime, timedelta
import requests
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.providers.http.hooks.http import HttpHook
import os
from datetime import timedelta from datetime import timedelta
import pendulum import pendulum
from airflow.decorators import dag from airflow.decorators import dag
from airflow.decorators import task from airflow.decorators import task
from airflow.exceptions import AirflowSkipException
from airflow.models.param import Param
from airflow.operators.python import get_current_context from airflow.operators.python import get_current_context
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from dag_utils import get_bucket_name, get_opensearch_client from dag_utils import get_opensearch_client
# Define default arguments # Define default arguments
default_args = { default_args = {