initial stage
This commit is contained in:
parent
48d2f69bc6
commit
43743187ba
|
@ -7,12 +7,43 @@ from catalogue.vocabulary import CATALOG_VOCABULARY
|
|||
|
||||
|
||||
class RawCatalogOpensearch:
|
||||
entities = ["datasources",
|
||||
"interoperability-records",
|
||||
"providers",
|
||||
"resource-interoperability-records",
|
||||
"services",
|
||||
"training-resources"]
|
||||
mapped_entities = ["interoperability-records", "training-resources", "services"]
|
||||
|
||||
def __init__(self, os_client: OpenSearch, suffix: str | None):
|
||||
self.os_client = os_client
|
||||
self.suffix = suffix
|
||||
|
||||
def get_index(self, name: str):
|
||||
return "catalog_" + name + ("" if self.suffix is None else f"_{self.suffix}")
|
||||
return f"catalog_{name}_{self.suffix}"
|
||||
|
||||
def get_alias(self, name: str):
|
||||
return f"catalog_{name}"
|
||||
|
||||
def get_mapped_index(self, name: str):
|
||||
match name:
|
||||
case "interoperability-records":
|
||||
return f"interoperability_{self.suffix}"
|
||||
case "training-resources":
|
||||
return f"training_{self.suffix}"
|
||||
case "services":
|
||||
return f"services_{self.suffix}"
|
||||
return None
|
||||
|
||||
def get_mapped_alias(self, name: str):
|
||||
match name:
|
||||
case "interoperability-records":
|
||||
return f"interoperability"
|
||||
case "training-resources":
|
||||
return f"training"
|
||||
case "services":
|
||||
return f"services"
|
||||
return None
|
||||
|
||||
def get_resource_interoperability_records(self, resource_id):
|
||||
response = self.os_client.search(
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import timedelta
|
||||
|
||||
|
@ -33,12 +32,6 @@ default_args = {
|
|||
default_args=default_args,
|
||||
params={
|
||||
"OPENSEARCH_CONN_ID": "opensearch_default",
|
||||
"ENTITIES": ["datasources",
|
||||
"interoperability-records",
|
||||
"providers",
|
||||
"resource-interoperability-records",
|
||||
"services",
|
||||
"training-resources"],
|
||||
"SHARDS": 3,
|
||||
"SUFFIX": pendulum.now().format('YYYYMMDDHHmmss')
|
||||
},
|
||||
|
@ -58,28 +51,11 @@ def import_catalogue_entities():
|
|||
timeout=180
|
||||
)
|
||||
|
||||
for entity in kwargs["params"]["ENTITIES"]:
|
||||
for entity in RawCatalogOpensearch.entities:
|
||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
||||
if client.indices.exists(indexname):
|
||||
client.indices.delete(indexname)
|
||||
|
||||
client.indices.create(indexname, {
|
||||
"settings": {
|
||||
"index": {
|
||||
"number_of_shards": kwargs["params"]["SHARDS"],
|
||||
"number_of_replicas": 0,
|
||||
"refresh_interval": -1,
|
||||
|
||||
"translog.flush_threshold_size": "2048MB",
|
||||
|
||||
"codec": "zstd_no_dict",
|
||||
"replication.type": "SEGMENT"
|
||||
}
|
||||
|
||||
}
|
||||
# "mappings": mappings[entity]
|
||||
})
|
||||
|
||||
@task
|
||||
def harvest_indexes(**kwargs):
|
||||
conn = BaseHook.get_connection(kwargs["params"]["OPENSEARCH_CONN_ID"])
|
||||
|
@ -95,7 +71,7 @@ def import_catalogue_entities():
|
|||
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||
|
||||
session = requests.session()
|
||||
for entity in kwargs["params"]["ENTITIES"]:
|
||||
for entity in RawCatalogOpensearch.entities:
|
||||
indexname = catalog.get_index(entity)
|
||||
baseurl = "http://vereniki.athenarc.gr:8080/eic-registry"
|
||||
callurl = f"{baseurl}/{entity}"
|
||||
|
@ -120,7 +96,7 @@ def import_catalogue_entities():
|
|||
|
||||
succeeded = 0
|
||||
failed = 0
|
||||
for success, item in helpers.parallel_bulk(client, actions=streamed_results(), timeout=5*60):
|
||||
for success, item in helpers.parallel_bulk(client, actions=streamed_results(), timeout=5 * 60):
|
||||
if success:
|
||||
succeeded = succeeded + 1
|
||||
else:
|
||||
|
@ -148,29 +124,33 @@ def import_catalogue_entities():
|
|||
|
||||
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||
|
||||
for entity in {"interoperability-records", "training-resources", "services"}.intersection(
|
||||
kwargs["params"]["ENTITIES"]):
|
||||
for entity in ["interoperability-records", "training-resources", "services"]:
|
||||
mapped_index = catalog.get_mapped_index(entity)
|
||||
|
||||
def streamed_results():
|
||||
nonlocal mapped_index
|
||||
for hit in opensearchpy.helpers.scan(client, index=catalog.get_index(entity),
|
||||
query={"query": {"match_all": {}}}):
|
||||
s = hit['_source']
|
||||
|
||||
r = hit['_source']
|
||||
doc = None
|
||||
match entity:
|
||||
case "interoperability-records":
|
||||
doc = catalog.map_interoperability(s)
|
||||
doc = catalog.map_interoperability(r)
|
||||
case "training-resources":
|
||||
doc = catalog.map_training(s)
|
||||
doc = catalog.map_training(r)
|
||||
case "services":
|
||||
doc = catalog.map_service(s)
|
||||
doc = catalog.map_service(r)
|
||||
|
||||
if doc is not None:
|
||||
client.update(
|
||||
index=f'{entity}_{kwargs["params"]["SUFFIX"]}',
|
||||
body={"doc": doc, "doc_as_upsert": True},
|
||||
id=doc['id'],
|
||||
refresh=True
|
||||
)
|
||||
yield {"_index": mapped_index, "_id": doc['id'], "_source": doc}
|
||||
|
||||
succeeded = 0
|
||||
failed = 0
|
||||
for success, item in helpers.parallel_bulk(client, actions=streamed_results(), timeout=5 * 60):
|
||||
if success:
|
||||
succeeded = succeeded + 1
|
||||
else:
|
||||
print("error: " + str(item))
|
||||
failed = failed + 1
|
||||
|
||||
@task
|
||||
def close_indexes(**kwargs):
|
||||
|
@ -184,32 +164,41 @@ def import_catalogue_entities():
|
|||
pool_maxsize=20,
|
||||
timeout=180
|
||||
)
|
||||
for entity in kwargs["params"]["ENTITIES"]:
|
||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
||||
client.indices.refresh(indexname)
|
||||
client.indices.put_settings(index=indexname, body={
|
||||
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||
|
||||
def refresh_index(index_name):
|
||||
if index_name is not None:
|
||||
client.indices.refresh(index_name)
|
||||
client.indices.put_settings(index=index_name, body={
|
||||
"index": {
|
||||
"number_of_replicas": 1,
|
||||
"refresh_interval": "60s",
|
||||
}
|
||||
})
|
||||
|
||||
# update aliases
|
||||
for entity in kwargs["params"]["ENTITIES"]:
|
||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
||||
def update_aliases(index_name, alias_name):
|
||||
if index_name is not None and alias_name is not None:
|
||||
client.indices.update_aliases(
|
||||
body={"actions": [
|
||||
{"remove": {"index": f"{entity}_*", "alias": entity}},
|
||||
{"add": {"index": indexname, "alias": entity}},
|
||||
{"remove": {"index": f"{alias_name}_*", "alias": alias_name}},
|
||||
{"add": {"index": index_name, "alias": alias_name}},
|
||||
]}
|
||||
)
|
||||
# update "allresources" alias
|
||||
|
||||
for entity in RawCatalogOpensearch.entities:
|
||||
refresh_index(catalog.get_index(entity))
|
||||
refresh_index(catalog.get_mapped_index(entity))
|
||||
update_aliases(catalog.get_index(entity), catalog.get_alias(entity))
|
||||
update_aliases(catalog.get_mapped_index(entity), catalog.get_mapped_alias(entity))
|
||||
|
||||
# update "allresources" alias with mapped indices
|
||||
actions = []
|
||||
for entity in kwargs["params"]["ENTITIES"]:
|
||||
if entity in ['products', 'services', 'training', 'interoperability']:
|
||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
||||
actions.append({"remove": {"index": f"{entity}_*", "alias": "allresources"}})
|
||||
actions.append({"add": {"index": indexname, "alias": "allresources"}})
|
||||
for entity in RawCatalogOpensearch.mapped_entities:
|
||||
index_name = catalog.get_mapped_index(entity)
|
||||
entity_alias = catalog.get_mapped_alias(entity)
|
||||
actions.append({"remove": {"index": f"{entity_alias}_*", "alias": "allresources"}})
|
||||
actions.append({"add": {"index": index_name, "alias": "allresources"}})
|
||||
|
||||
if len(actions) > 0:
|
||||
client.indices.update_aliases(
|
||||
body={"actions": actions}
|
||||
|
|
Loading…
Reference in New Issue