initial stage
This commit is contained in:
parent
118e29f462
commit
a2e7c4beb6
|
@ -0,0 +1,77 @@
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import requests
|
||||||
|
from airflow import DAG
|
||||||
|
from airflow.operators.python_operator import PythonOperator
|
||||||
|
from airflow.providers.http.hooks.http import HttpHook
|
||||||
|
import os
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
import pendulum
|
||||||
|
from airflow.decorators import dag
|
||||||
|
from airflow.decorators import task
|
||||||
|
from airflow.exceptions import AirflowSkipException
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.operators.python import get_current_context
|
||||||
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
||||||
|
|
||||||
|
from dag_utils import get_bucket_name, get_opensearch_client
|
||||||
|
|
||||||
|
# Define default arguments
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': timedelta(minutes=5),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dag(
|
||||||
|
dag_id="remove_old_indexes",
|
||||||
|
dag_display_name="Remove outdated MKG indexes",
|
||||||
|
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
|
||||||
|
schedule=None,
|
||||||
|
catchup=False,
|
||||||
|
default_args=default_args,
|
||||||
|
params={
|
||||||
|
"OPENSEARCH_CONN_ID": "opensearch_default",
|
||||||
|
},
|
||||||
|
tags=["opensearch", "maintenance"],
|
||||||
|
)
|
||||||
|
def remove_old_indexes():
|
||||||
|
@task
|
||||||
|
def remove_indexes():
|
||||||
|
context = get_current_context()
|
||||||
|
client = get_opensearch_client(context)
|
||||||
|
indexes = client.cat.indices()
|
||||||
|
aliases = client.cat.aliases()
|
||||||
|
|
||||||
|
print(json.dumps(aliases))
|
||||||
|
print(json.dumps(indexes))
|
||||||
|
|
||||||
|
alias_index_names = {alias['index'] for alias in aliases}
|
||||||
|
index_dict = {}
|
||||||
|
|
||||||
|
for index in indexes:
|
||||||
|
index_name = index['index']
|
||||||
|
if '_' in index_name:
|
||||||
|
base_name = '_'.join(index_name.split('_')[:-1])
|
||||||
|
timestamp = index_name.split('_')[-1]
|
||||||
|
if base_name not in index_dict:
|
||||||
|
index_dict[base_name] = []
|
||||||
|
index_dict[base_name].append((index_name, timestamp))
|
||||||
|
|
||||||
|
for base_name, index_list in index_dict.items():
|
||||||
|
index_list.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
most_recent_index = index_list[0][0]
|
||||||
|
for index_name, timestamp in index_list:
|
||||||
|
if index_name != most_recent_index and index_name not in alias_index_names:
|
||||||
|
#hook.run(f'/{index_name}')
|
||||||
|
print(f'Deleted index: {index_name}')
|
||||||
|
|
||||||
|
remove_indexes()
|
||||||
|
|
||||||
|
|
||||||
|
remove_old_indexes()
|
Loading…
Reference in New Issue