from datetime import datetime from enum import Enum from flask_openapi3 import Info, Tag from flask_openapi3 import OpenAPI from jsonargparse import ArgumentParser from opensearchpy import OpenSearch, NotFoundError, helpers from pydantic import BaseModel, SecretStr import logging parser = ArgumentParser(env_prefix="CURATION", default_env=True) parser.add_argument("--opensearch.host", default='opensearch-cluster.local-dataplatform') parser.add_argument("--opensearch.port", default=443, type=int) parser.add_argument("--opensearch.user", default="admin", type=SecretStr) parser.add_argument("--opensearch.password", default="admin", type=SecretStr) parser.add_argument("--debug", default=False, type=bool) cfg = parser.parse_args() print(cfg.as_dict()) client = OpenSearch( hosts=[{'host': cfg.get("opensearch.host"), 'port': cfg.get("opensearch.port")}], http_auth=(cfg.get("opensearch.user").get_secret_value(), cfg.get("opensearch.password").get_secret_value()), use_ssl=True, verify_certs=False, ssl_show_warn=False, pool_maxsize=20, ) # if client.indices.exists("curation"): # client.indices.delete("curation") if not client.indices.exists("curation"): client.indices.create("curation", { "settings": { "index": { "number_of_shards": 10, "number_of_replicas": 0, "codec": "zstd_no_dict", "replication.type": "SEGMENT" }, }, "mappings": { "dynamic": "strict", "properties": { "local_identifier": { "type": "keyword" }, "timestamp": { "type": "date", "format": "date_hour_minute_second_fraction" }, "creator": { "type": "keyword" }, "status": { "type": "keyword" }, "note": { "index": False, "type": "text" }, "log": { "type": "object", "properties": { "timestamp": { "format": "date_hour_minute_second_fraction", "type": "date" }, "creator": { "type": "keyword" }, "status": { "index": False, "type": "keyword" }, "note": { "index": False, "type": "text" }, } } } } }) info = Info(title="Curator API", version="1.0.0") app = OpenAPI(__name__, info=info) curation_tag = Tag(name="curation", description="Curator API") class CurationStatus(str, Enum): valid = "valid" withdrawn = "withdrawn" alert = "alert" restore = "restore" reset = "reset" class CurationRequest(BaseModel): local_identifier: str creator: str status: CurationStatus note: str class LogEntry(BaseModel): timestamp: str creator: str status: CurationStatus note: str class CurationResponse(BaseModel): local_identifier: str timestamp: str creator: str status: CurationStatus note: str log: list[LogEntry] @app.route('/health') def health_check(): if all_required_services_are_running(): return 'OK', 200 else: return 'Service Unavailable', 500 def all_required_services_are_running(): os_health = client.cluster.health() return os_health['status'] in ['green', 'yellow'] and os_health['number_of_nodes'] > 0 @app.post("/curation", summary="set curation", responses={200: CurationResponse}, tags=[curation_tag]) def post_curation(query: CurationRequest): """ set curation status """ curation = dict() try: hit = client.get(index="curation", id=query.local_identifier) curation = hit['_source'] if query.status.name == curation['status']: return {"msg": "status is not changed"}, 403 # move current status in history annotations = curation['log'] if 'log' in curation else list() if isinstance(annotations, dict): annotations = [annotations] annotations.insert(0, { "timestamp": curation['timestamp'], "creator": curation['creator'], "status": curation['status'], "note": curation['note'], }) annotations = annotations[0:100] curation['log'] = annotations curation['timestamp'] = datetime.now().isoformat() curation['creator'] = query.creator curation['note'] = query.note print(curation) # todo check status transition match query.status.name: case "valid": if curation['status'] not in ('restore', 'reset'): return {"msg": "status cannot be updated to 'valid'"}, 403 curation['status'] = query.status.name case "withdrawn": curation['status'] = query.status.name case "alert": curation['status'] = query.status.name case "restore": if curation['status'] != "withdrawn": return {"msg": "only withdrawn records can be restored'"}, 403 curation['status'] = query.status.name case "reset": curation['status'] = query.status.name #TODO transactionality in case of failure? client.index( index='curation', id=query.local_identifier, body=curation, refresh=True, if_primary_term=hit['_primary_term'], if_seq_no=hit['_seq_no'] ) metadata_status = curation['status'] if metadata_status == 'reset': client.update( index='products', id=query.local_identifier, body={ "script": {"source": "ctx._source.remove(\"status\")"} }, refresh=True ) else: if metadata_status == "restore": metadata_status = 'valid' client.update( index='products', id=query.local_identifier, body={ "doc": {"status": metadata_status} }, refresh=True ) except NotFoundError: curation['local_identifier'] = query.local_identifier curation['timestamp'] = datetime.now().isoformat() curation['status'] = query.status.name curation['creator'] = query.creator curation['note'] = query.note match query.status.name: case "restore": return {"msg": "cannot restore: status does not exist'"}, 403 case "reset": return {"msg": "cannot reset: status does not exist'"}, 403 client.index( index='curation', id=query.local_identifier, body=curation, refresh=True, op_type='create' ) client.update( index='products', id=query.local_identifier, body={ "doc": {"status": curation['status']} }, refresh=True ) return curation @app.get("/curation", summary="get curation", tags=[curation_tag]) def get_curation(local_identifier: str): """ to get a curation record """ try: hit = client.get(index="curation", id=local_identifier) return { "code": 0, "message": "ok", "data": hit['_source'] } except NotFoundError: return {"msg": f"Cannot fetch: '{local_identifier}' does not exist'"}, 403 @app.get("/alerts", summary="get curation in alert status", tags=[curation_tag]) def get_alerts(): """ to get a curation record """ query = { "query": { "terms": { "status": [CurationStatus.alert] } } } return { "code": 0, "message": "ok", "data": list(helpers.scan(client, index="curation", query=query)) } if __name__ == "__main__": debug = False if debug: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') app.run(debug=True) else: from waitress import serve logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') serve(app, host="0.0.0.0", port=5000)