simple test DAG

This commit is contained in:
Giambattista Bloisi 2024-03-18 13:49:25 +01:00
parent 1152e14920
commit 65daefb971
2 changed files with 33 additions and 34 deletions

View File

@ -3,6 +3,7 @@ from __future__ import annotations
import gzip
import io
import json
import logging
import os
import zipfile
from datetime import timedelta
@ -29,7 +30,7 @@ OPENSEARCH_PASSWD = Variable.get("OPENSEARCH_PASSWORD", "admin")
ENTITIES = [ "datasources", "grants", "organizations", "persons", "products", "topics", "venues"]
BULK_PARALLELISM = 4
BULK_PARALLELISM = 7
#
@ -146,11 +147,15 @@ def skg_if_pipeline():
data['_id'] = data['local_identifier']
yield data
# disable success post logging
logging.getLogger("opensearch").setLevel(logging.WARN)
succeeded = 0
failed = 0
for success, item in helpers.parallel_bulk(client, actions=_generate_data(), raise_on_exception=False,
for success, item in helpers.parallel_bulk(client, actions=_generate_data(),
raise_on_exception=False,
raise_on_error=False,
chunk_size=500, max_chunk_bytes=10 * 1024 * 1024):
chunk_size=500, max_chunk_bytes=10 * 1024 * 1024,
timeout=180):
if success:
succeeded = succeeded + 1
else:

View File

@ -38,7 +38,7 @@ mappings['datasources'] = {
"type": "text"
},
"persistent_identity_systems": {
"type": "nested",
"type": "object",
"properties": {
"product_type": {
"type": "keyword"
@ -49,7 +49,7 @@ mappings['datasources'] = {
}
},
"research_product_license": {
"type": "nested",
"type": "object",
"properties": {
"name": {
"type": "text"
@ -63,7 +63,7 @@ mappings['datasources'] = {
"type": "keyword"
},
"research_metadata_license": {
"type": "nested",
"type": "object",
"properties": {
"name": {
"type": "text"
@ -82,7 +82,7 @@ mappings['datasources'] = {
mappings['venues'] = {
"properties": {
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -120,7 +120,7 @@ mappings['venues'] = {
mappings['topics'] = {
"properties": {
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -171,7 +171,7 @@ mappings['persons'] = {
"type": "text"
},
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -193,7 +193,7 @@ mappings['organizations'] = {
"type": "keyword"
},
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -276,7 +276,7 @@ mappings['grants'] = {
"type": "keyword"
},
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -332,10 +332,10 @@ mappings['products'] = {
}
},
"contributions": {
"type": "nested",
"type": "object",
"properties": {
"person": {
"type": "nested",
"type": "object",
"properties": {
"full_name": {
"fields": {
@ -365,7 +365,7 @@ mappings['products'] = {
}
},
"funding": {
"type": "nested",
"type": "object",
"properties": {
"code": {
"type": "keyword"
@ -387,7 +387,7 @@ mappings['products'] = {
}
},
"identifiers": {
"type": "nested",
"type": "object",
"properties": {
"scheme": {
"type": "keyword"
@ -436,7 +436,7 @@ mappings['products'] = {
}
},
"dates": {
"type": "nested",
"type": "object",
"properties": {
"type": {
"type": "text"
@ -450,13 +450,7 @@ mappings['products'] = {
"type": "object",
"properties": {
"local_identifier": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
"type": "keyword"
},
"name": {
"fields": {
@ -491,7 +485,7 @@ mappings['products'] = {
"type": "text"
},
"venue": {
"type": "nested",
"type": "object",
"properties": {
"local_identifier": {
"type": "keyword"
@ -507,10 +501,10 @@ mappings['products'] = {
"type": "keyword"
},
"related_products": {
"type": "nested",
"type": "object",
"properties": {
"product_list": {
"type": "nested",
"type": "object",
"properties": {
"doi": {
"type": "keyword"
@ -529,7 +523,7 @@ mappings['products'] = {
}
},
"relevant_organizations": {
"type": "nested",
"type": "object",
"properties": {
"isni": {
"type": "keyword"
@ -558,10 +552,10 @@ mappings['products'] = {
}
},
"topics": {
"type": "nested",
"type": "object",
"properties": {
"provenance": {
"type": "nested",
"type": "object",
"properties": {
"trust": {
"type": "keyword",
@ -573,7 +567,7 @@ mappings['products'] = {
}
},
"topic": {
"type": "nested",
"type": "object",
"properties": {
"local_identifier": {
"type": "keyword"
@ -600,7 +594,7 @@ mappings['products'] = {
mappings['services'] = {
"properties": {
"category": {
"type": "nested",
"type": "object",
"properties": {
"category": {
"type": "keyword"
@ -614,7 +608,7 @@ mappings['services'] = {
"type": "text"
},
"domain": {
"type": "nested",
"type": "object",
"properties": {
"domain": {
"type": "keyword"
@ -696,7 +690,7 @@ mappings['interoperability'] = {
"type": "long"
},
"services": {
"type": "nested",
"type": "object",
"properties": {
"name": {
"type": "keyword"
@ -741,7 +735,7 @@ mappings['training'] = {
"type": "text"
},
"domain": {
"type": "nested",
"type": "object",
"properties": {
"domain": {
"type": "keyword"