initial stage
This commit is contained in:
parent
48d2f69bc6
commit
43743187ba
|
@ -7,12 +7,43 @@ from catalogue.vocabulary import CATALOG_VOCABULARY
|
||||||
|
|
||||||
|
|
||||||
class RawCatalogOpensearch:
|
class RawCatalogOpensearch:
|
||||||
|
entities = ["datasources",
|
||||||
|
"interoperability-records",
|
||||||
|
"providers",
|
||||||
|
"resource-interoperability-records",
|
||||||
|
"services",
|
||||||
|
"training-resources"]
|
||||||
|
mapped_entities = ["interoperability-records", "training-resources", "services"]
|
||||||
|
|
||||||
def __init__(self, os_client: OpenSearch, suffix: str | None):
|
def __init__(self, os_client: OpenSearch, suffix: str | None):
|
||||||
self.os_client = os_client
|
self.os_client = os_client
|
||||||
self.suffix = suffix
|
self.suffix = suffix
|
||||||
|
|
||||||
def get_index(self, name: str):
|
def get_index(self, name: str):
|
||||||
return "catalog_" + name + ("" if self.suffix is None else f"_{self.suffix}")
|
return f"catalog_{name}_{self.suffix}"
|
||||||
|
|
||||||
|
def get_alias(self, name: str):
|
||||||
|
return f"catalog_{name}"
|
||||||
|
|
||||||
|
def get_mapped_index(self, name: str):
|
||||||
|
match name:
|
||||||
|
case "interoperability-records":
|
||||||
|
return f"interoperability_{self.suffix}"
|
||||||
|
case "training-resources":
|
||||||
|
return f"training_{self.suffix}"
|
||||||
|
case "services":
|
||||||
|
return f"services_{self.suffix}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_mapped_alias(self, name: str):
|
||||||
|
match name:
|
||||||
|
case "interoperability-records":
|
||||||
|
return f"interoperability"
|
||||||
|
case "training-resources":
|
||||||
|
return f"training"
|
||||||
|
case "services":
|
||||||
|
return f"services"
|
||||||
|
return None
|
||||||
|
|
||||||
def get_resource_interoperability_records(self, resource_id):
|
def get_resource_interoperability_records(self, resource_id):
|
||||||
response = self.os_client.search(
|
response = self.os_client.search(
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
|
@ -33,12 +32,6 @@ default_args = {
|
||||||
default_args=default_args,
|
default_args=default_args,
|
||||||
params={
|
params={
|
||||||
"OPENSEARCH_CONN_ID": "opensearch_default",
|
"OPENSEARCH_CONN_ID": "opensearch_default",
|
||||||
"ENTITIES": ["datasources",
|
|
||||||
"interoperability-records",
|
|
||||||
"providers",
|
|
||||||
"resource-interoperability-records",
|
|
||||||
"services",
|
|
||||||
"training-resources"],
|
|
||||||
"SHARDS": 3,
|
"SHARDS": 3,
|
||||||
"SUFFIX": pendulum.now().format('YYYYMMDDHHmmss')
|
"SUFFIX": pendulum.now().format('YYYYMMDDHHmmss')
|
||||||
},
|
},
|
||||||
|
@ -58,28 +51,11 @@ def import_catalogue_entities():
|
||||||
timeout=180
|
timeout=180
|
||||||
)
|
)
|
||||||
|
|
||||||
for entity in kwargs["params"]["ENTITIES"]:
|
for entity in RawCatalogOpensearch.entities:
|
||||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
||||||
if client.indices.exists(indexname):
|
if client.indices.exists(indexname):
|
||||||
client.indices.delete(indexname)
|
client.indices.delete(indexname)
|
||||||
|
|
||||||
client.indices.create(indexname, {
|
|
||||||
"settings": {
|
|
||||||
"index": {
|
|
||||||
"number_of_shards": kwargs["params"]["SHARDS"],
|
|
||||||
"number_of_replicas": 0,
|
|
||||||
"refresh_interval": -1,
|
|
||||||
|
|
||||||
"translog.flush_threshold_size": "2048MB",
|
|
||||||
|
|
||||||
"codec": "zstd_no_dict",
|
|
||||||
"replication.type": "SEGMENT"
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
# "mappings": mappings[entity]
|
|
||||||
})
|
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def harvest_indexes(**kwargs):
|
def harvest_indexes(**kwargs):
|
||||||
conn = BaseHook.get_connection(kwargs["params"]["OPENSEARCH_CONN_ID"])
|
conn = BaseHook.get_connection(kwargs["params"]["OPENSEARCH_CONN_ID"])
|
||||||
|
@ -95,7 +71,7 @@ def import_catalogue_entities():
|
||||||
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||||
|
|
||||||
session = requests.session()
|
session = requests.session()
|
||||||
for entity in kwargs["params"]["ENTITIES"]:
|
for entity in RawCatalogOpensearch.entities:
|
||||||
indexname = catalog.get_index(entity)
|
indexname = catalog.get_index(entity)
|
||||||
baseurl = "http://vereniki.athenarc.gr:8080/eic-registry"
|
baseurl = "http://vereniki.athenarc.gr:8080/eic-registry"
|
||||||
callurl = f"{baseurl}/{entity}"
|
callurl = f"{baseurl}/{entity}"
|
||||||
|
@ -148,29 +124,33 @@ def import_catalogue_entities():
|
||||||
|
|
||||||
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||||
|
|
||||||
for entity in {"interoperability-records", "training-resources", "services"}.intersection(
|
for entity in ["interoperability-records", "training-resources", "services"]:
|
||||||
kwargs["params"]["ENTITIES"]):
|
mapped_index = catalog.get_mapped_index(entity)
|
||||||
|
|
||||||
|
def streamed_results():
|
||||||
|
nonlocal mapped_index
|
||||||
for hit in opensearchpy.helpers.scan(client, index=catalog.get_index(entity),
|
for hit in opensearchpy.helpers.scan(client, index=catalog.get_index(entity),
|
||||||
query={"query": {"match_all": {}}}):
|
query={"query": {"match_all": {}}}):
|
||||||
s = hit['_source']
|
r = hit['_source']
|
||||||
|
|
||||||
doc = None
|
doc = None
|
||||||
match entity:
|
match entity:
|
||||||
case "interoperability-records":
|
case "interoperability-records":
|
||||||
doc = catalog.map_interoperability(s)
|
doc = catalog.map_interoperability(r)
|
||||||
case "training-resources":
|
case "training-resources":
|
||||||
doc = catalog.map_training(s)
|
doc = catalog.map_training(r)
|
||||||
case "services":
|
case "services":
|
||||||
doc = catalog.map_service(s)
|
doc = catalog.map_service(r)
|
||||||
|
|
||||||
if doc is not None:
|
yield {"_index": mapped_index, "_id": doc['id'], "_source": doc}
|
||||||
client.update(
|
|
||||||
index=f'{entity}_{kwargs["params"]["SUFFIX"]}',
|
succeeded = 0
|
||||||
body={"doc": doc, "doc_as_upsert": True},
|
failed = 0
|
||||||
id=doc['id'],
|
for success, item in helpers.parallel_bulk(client, actions=streamed_results(), timeout=5 * 60):
|
||||||
refresh=True
|
if success:
|
||||||
)
|
succeeded = succeeded + 1
|
||||||
|
else:
|
||||||
|
print("error: " + str(item))
|
||||||
|
failed = failed + 1
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def close_indexes(**kwargs):
|
def close_indexes(**kwargs):
|
||||||
|
@ -184,32 +164,41 @@ def import_catalogue_entities():
|
||||||
pool_maxsize=20,
|
pool_maxsize=20,
|
||||||
timeout=180
|
timeout=180
|
||||||
)
|
)
|
||||||
for entity in kwargs["params"]["ENTITIES"]:
|
catalog = RawCatalogOpensearch(client, kwargs["params"]["SUFFIX"])
|
||||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
|
||||||
client.indices.refresh(indexname)
|
def refresh_index(index_name):
|
||||||
client.indices.put_settings(index=indexname, body={
|
if index_name is not None:
|
||||||
|
client.indices.refresh(index_name)
|
||||||
|
client.indices.put_settings(index=index_name, body={
|
||||||
"index": {
|
"index": {
|
||||||
"number_of_replicas": 1,
|
"number_of_replicas": 1,
|
||||||
"refresh_interval": "60s",
|
"refresh_interval": "60s",
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
# update aliases
|
def update_aliases(index_name, alias_name):
|
||||||
for entity in kwargs["params"]["ENTITIES"]:
|
if index_name is not None and alias_name is not None:
|
||||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
|
||||||
client.indices.update_aliases(
|
client.indices.update_aliases(
|
||||||
body={"actions": [
|
body={"actions": [
|
||||||
{"remove": {"index": f"{entity}_*", "alias": entity}},
|
{"remove": {"index": f"{alias_name}_*", "alias": alias_name}},
|
||||||
{"add": {"index": indexname, "alias": entity}},
|
{"add": {"index": index_name, "alias": alias_name}},
|
||||||
]}
|
]}
|
||||||
)
|
)
|
||||||
# update "allresources" alias
|
|
||||||
|
for entity in RawCatalogOpensearch.entities:
|
||||||
|
refresh_index(catalog.get_index(entity))
|
||||||
|
refresh_index(catalog.get_mapped_index(entity))
|
||||||
|
update_aliases(catalog.get_index(entity), catalog.get_alias(entity))
|
||||||
|
update_aliases(catalog.get_mapped_index(entity), catalog.get_mapped_alias(entity))
|
||||||
|
|
||||||
|
# update "allresources" alias with mapped indices
|
||||||
actions = []
|
actions = []
|
||||||
for entity in kwargs["params"]["ENTITIES"]:
|
for entity in RawCatalogOpensearch.mapped_entities:
|
||||||
if entity in ['products', 'services', 'training', 'interoperability']:
|
index_name = catalog.get_mapped_index(entity)
|
||||||
indexname = f'{entity}_{kwargs["params"]["SUFFIX"]}'
|
entity_alias = catalog.get_mapped_alias(entity)
|
||||||
actions.append({"remove": {"index": f"{entity}_*", "alias": "allresources"}})
|
actions.append({"remove": {"index": f"{entity_alias}_*", "alias": "allresources"}})
|
||||||
actions.append({"add": {"index": indexname, "alias": "allresources"}})
|
actions.append({"add": {"index": index_name, "alias": "allresources"}})
|
||||||
|
|
||||||
if len(actions) > 0:
|
if len(actions) > 0:
|
||||||
client.indices.update_aliases(
|
client.indices.update_aliases(
|
||||||
body={"actions": actions}
|
body={"actions": actions}
|
||||||
|
|
Loading…
Reference in New Issue