lot1-kickoff/airflow/dags/opensearch_indexes.py

594 lines
15 KiB
Python

mappings = {}
mappings['datasources'] = {
"properties": {
"data_source_classification": {
"type": "keyword"
},
"jurisdiction": {
"type": "keyword"
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"research_product_type": {
"type": "keyword"
},
"thematic": {
"type": "boolean"
},
"version_control": {
"type": "boolean"
},
# TODO: THOSE WERE NOT INFERREd AUTOMATICALLY
"submission_policy_url": {
"type": "text"
},
"preservation_policy_url": {
"type": "text"
},
"persistent_identity_systems": {
"type": "nested",
"properties": {
"product_type": {
"type": "keyword"
},
"pid_schemes": {
"type": "keyword"
}
}
},
"research_product_license": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"research_product_access_policy": {
"type": "keyword"
},
"research_metadata_license": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"research_metadata_access_policy": {
"type": "keyword"
},
}
}
mappings['venues'] = {
"properties": {
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"publisher": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"type": {
"type": "keyword"
}
}
}
mappings['topics'] = {
"properties": {
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
}
}
}
mappings['persons'] = {
"properties": {
"family_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"full_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"given_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
}
}
}
mappings['organizations'] = {
"properties": {
"country": {
"type": "keyword"
},
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"other_names": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"short_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"type": {
"type": "keyword"
}
}
}
mappings['grants'] = {
"properties": {
"acronym": { # TODO: could be keyword only??
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"currency": {
"type": "keyword"
},
"end_date": {
"type": "date",
"ignore_malformed": "true"
},
"funded_amount": {
"type": "double"
},
"funder": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"funding_stream": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"grantCode": {
"type": "keyword"
},
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"start_date": {
"type": "date",
"ignore_malformed": "true"
},
"summary": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"title": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"website": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
}
}
}
mappings['products'] = {
"properties": {
"abstracts": {
"type": "object",
"properties": {
# TODO: other languages via index templates
"none": {
"type": "text"
}
}
},
"contributions": {
"type": "nested",
"properties": {
"person": {
"type": "nested",
"properties": {
"full_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"local_identifier": {
"type": "keyword"
},
"orcid": {
"type": "keyword"
}
}
},
"declared_affiliations": {
"type": "keyword" # TODO: ask to Miriam, not mapped automatically
},
"rank": {
"type": "long"
},
"roles": {
"type": "keyword" # TODO: ask to Miriam, not mapped automatically
}
}
},
"funding": {
"type": "nested",
"properties": {
"code": {
"type": "keyword"
},
"funder": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"local_identifier": {
"type": "keyword"
},
"title": {
"type": "text"
}
}
},
"identifiers": {
"type": "nested",
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"manifestations": {
"type": "object",
"properties": {
"access_right": {
"type": "keyword"
},
"biblio": {
"type": "object",
"properties": {
"end_page": {
"type": "text"
},
"publisher": {
"type": "text"
},
"start_page": {
"type": "text"
},
"volume": {
"type": "text"
},
"issue": {
"type": "text"
},
"edition": {
"type": "text"
},
"number": {
"type": "text"
},
"hosting_data_source": {
"type": "text"
}
}
},
"dates": {
"type": "nested",
"properties": {
"type": {
"type": "text"
},
"value": {
"type": "date"
}
}
},
"hosting_datasource": {
"type": "object",
"properties": {
"local_identifier": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"name": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"licence": {
"type": "text"
},
"metadata_curation": {
"type": "keyword"
},
"peer_review": {
"type": "keyword"
},
"pid": {
"type": "keyword"
},
"product_local_type": {
"type": "text"
},
"product_local_type_schema": {
"type": "text"
},
"url": {
"type": "text"
},
"venue": {
"type": "nested",
"properties": {
"local_identifier": {
"type": "keyword"
},
"name": {
"type": "text"
}
}
}
}
},
"product_type": {
"type": "keyword"
},
"related_products": {
"type": "nested",
"properties": {
"product_list": {
"type": "nested",
"properties": {
"doi": {
"type": "keyword"
},
"local_identifier": {
"type": "keyword"
},
"title": {
"type": "text"
}
}
},
"relation_type": {
"type": "keyword"
}
}
},
"relevant_organizations": {
"type": "nested",
"properties": {
"isni": {
"type": "keyword"
},
"local_identifier": {
"type": "keyword"
},
"name": {
"type": "text"
},
"ror": {
"type": "keyword"
},
"wikidata": {
"type": "keyword"
}
}
},
"titles": {
"type": "object",
#TODO: other languages ??
"properties": {
"none": {
"type": "text"
}
}
},
"topics": {
"type": "nested",
"properties": {
"provenance": {
"type": "nested",
"properties": {
"trust": {
"type": "double"
},
"type": {
"type": "keyword"
}
}
},
"topic": {
"type": "nested",
"properties": {
"local_identifier": {
"type": "keyword"
},
"value": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
}
}
}
}
}
}
}