lot1-kickoff/airflow/dags/EOSC_indexes.py

855 lines
21 KiB
Python
Raw Normal View History

2024-03-12 15:57:14 +01:00
2024-03-14 21:04:35 +01:00
mappings = {}
2024-03-12 15:57:14 +01:00
2024-03-24 19:01:00 +01:00
mappings['datasource'] = {
2024-03-14 21:04:35 +01:00
"properties": {
"data_source_classification": {
"type": "keyword"
},
"jurisdiction": {
"type": "keyword"
},
"local_identifier": {
"type": "keyword"
},
2024-03-22 14:06:07 +01:00
"eoscId": {
"type": "keyword"
},
2024-03-14 21:04:35 +01:00
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"research_product_type": {
"type": "keyword"
},
"thematic": {
"type": "boolean"
},
"version_control": {
"type": "boolean"
},
2024-03-12 15:57:14 +01:00
2024-03-15 12:17:13 +01:00
# TODO: THOSE WERE NOT INFERREd AUTOMATICALLY
2024-03-14 21:04:35 +01:00
"submission_policy_url": {
"type": "text"
},
"preservation_policy_url": {
"type": "text"
},
"persistent_identity_systems": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-14 21:04:35 +01:00
"properties": {
"product_type": {
"type": "keyword"
},
"pid_schemes": {
"type": "keyword"
}
}
},
"research_product_license": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-14 21:04:35 +01:00
"properties": {
"name": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"research_product_access_policy": {
"type": "keyword"
},
"research_metadata_license": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-14 21:04:35 +01:00
"properties": {
"name": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"research_metadata_access_policy": {
"type": "keyword"
},
}
}
mappings['venues'] = {
2024-03-12 15:57:14 +01:00
"properties": {
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"publisher": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"type": {
"type": "keyword"
}
}
}
2024-03-14 21:04:35 +01:00
mappings['topics'] = {
2024-03-12 15:57:14 +01:00
"properties": {
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
}
}
}
2024-03-14 21:04:35 +01:00
mappings['persons'] = {
2024-03-12 15:57:14 +01:00
"properties": {
"family_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
2024-03-14 21:04:35 +01:00
"full_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
2024-03-12 15:57:14 +01:00
"given_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
}
}
}
2024-03-14 21:04:35 +01:00
mappings['organizations'] = {
2024-03-12 15:57:14 +01:00
"properties": {
"country": {
"type": "keyword"
},
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"local_identifier": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"other_names": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"short_name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"type": {
"type": "keyword"
}
}
}
2024-03-14 21:04:35 +01:00
mappings['grants'] = {
2024-03-12 15:57:14 +01:00
"properties": {
2024-03-14 21:04:35 +01:00
"acronym": { # TODO: could be keyword only??
2024-03-12 15:57:14 +01:00
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"currency": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"end_date": {
2024-03-14 21:46:33 +01:00
"type": "date",
"ignore_malformed": "true"
2024-03-12 15:57:14 +01:00
},
"funded_amount": {
2024-03-14 21:04:35 +01:00
"type": "double"
2024-03-12 15:57:14 +01:00
},
"funder": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"funding_stream": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"grantCode": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"value": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
"local_identifier": {
"type": "keyword"
},
"start_date": {
2024-03-14 21:46:33 +01:00
"type": "date",
"ignore_malformed": "true"
2024-03-12 15:57:14 +01:00
},
"summary": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"title": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"website": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
}
}
}
2024-03-14 21:04:35 +01:00
mappings['products'] = {
2024-03-12 15:57:14 +01:00
"properties": {
2024-04-05 17:41:22 +02:00
### Syntethic fields
2024-04-06 11:05:37 +02:00
### generated by transformation
2024-04-05 17:41:22 +02:00
"accessRights": {
"type": "keyword"
},
"domain": {
"type": "object",
"properties": {
"domain": {
"type": "keyword"
},
"subdomain": {
"type": "keyword"
}
}
},
"firstPublishDate": {
"type": "date"
},
"keyword": {
"type": "keyword"
},
2024-04-06 11:05:37 +02:00
### raw fields
2024-03-12 15:57:14 +01:00
"abstracts": {
"type": "object",
"properties": {
2024-03-14 21:04:35 +01:00
# TODO: other languages via index templates
2024-03-12 15:57:14 +01:00
"none": {
"type": "text"
}
}
},
"contributions": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
2024-04-06 11:05:37 +02:00
"declared_affiliations": {
"type": "keyword"
},
2024-03-12 15:57:14 +01:00
"person": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"full_name": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"local_identifier": {
"type": "keyword"
},
"orcid": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
"rank": {
"type": "long"
2024-03-14 21:04:35 +01:00
},
"roles": {
2024-04-06 11:05:37 +02:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
"funding": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"code": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"funder": {
"fields": {
"keyword": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
},
"type": "text"
},
"local_identifier": {
"type": "keyword"
},
"title": {
"type": "text"
}
}
},
"identifiers": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"scheme": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"value": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
2024-04-06 11:05:37 +02:00
"indicator": {
"type": "object",
"properties": {
"downloadsAndViews": {
"type": "object",
"properties": {
"downloads": {
"type": "long"
},
"views": {
"type": "long"
}
}
}
}
},
2024-03-12 15:57:14 +01:00
"local_identifier": {
"type": "keyword"
},
"manifestations": {
"type": "object",
"properties": {
"access_right": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"biblio": {
"type": "object",
"properties": {
2024-04-06 11:05:37 +02:00
"edition": {
2024-03-12 15:57:14 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"end_page": {
2024-03-12 15:57:14 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"hosting_data_source": {
2024-03-12 15:57:14 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"issue": {
2024-03-14 21:04:35 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"number": {
2024-03-14 21:04:35 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"publisher": {
2024-03-14 21:04:35 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"start_page": {
2024-03-14 21:04:35 +01:00
"type": "text"
},
2024-04-06 11:05:37 +02:00
"volume": {
2024-03-12 15:57:14 +01:00
"type": "text"
}
}
},
"dates": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"type": {
"type": "text"
},
"value": {
"type": "date"
}
}
},
2024-04-06 11:05:37 +02:00
"eoscId": {
"type": "keyword"
},
2024-03-12 15:57:14 +01:00
"hosting_datasource": {
"type": "object",
"properties": {
"local_identifier": {
2024-03-18 13:49:25 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"name": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"licence": {
"type": "text"
},
"metadata_curation": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"peer_review": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"pid": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"product_local_type": {
2024-04-06 11:05:37 +02:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"product_local_type_schema": {
2024-04-06 11:05:37 +02:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"url": {
"type": "text"
},
"venue": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"local_identifier": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"name": {
"type": "text"
}
}
}
}
},
"product_type": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"related_products": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"product_list": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
2024-04-06 11:05:37 +02:00
"arxivid": {
"type": "keyword"
},
2024-03-12 15:57:14 +01:00
"doi": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"local_identifier": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
2024-04-06 11:05:37 +02:00
"pmid": {
"type": "keyword"
},
2024-03-12 15:57:14 +01:00
"title": {
"type": "text"
}
}
},
"relation_type": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
"relevant_organizations": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"isni": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"local_identifier": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"name": {
"type": "text"
},
2024-04-06 11:05:37 +02:00
"rinGold": {
"type": "keyword"
},
2024-03-12 15:57:14 +01:00
"ror": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
"wikidata": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
},
"titles": {
"type": "object",
2024-03-14 21:04:35 +01:00
#TODO: other languages ??
2024-03-12 15:57:14 +01:00
"properties": {
"none": {
"type": "text"
}
}
},
"topics": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"provenance": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"trust": {
2024-03-18 12:13:36 +01:00
"type": "keyword",
"index": "false"
2024-03-12 15:57:14 +01:00
},
"type": {
2024-04-06 11:05:37 +02:00
"type": "keyword",
"index": "false"
2024-03-12 15:57:14 +01:00
}
}
},
"topic": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-12 15:57:14 +01:00
"properties": {
"local_identifier": {
2024-03-14 21:04:35 +01:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
},
2024-04-06 11:05:37 +02:00
"scheme": {
"type": "keyword"
},
2024-03-15 12:17:13 +01:00
"value": {
2024-04-06 11:05:37 +02:00
"type": "keyword"
2024-03-12 15:57:14 +01:00
}
}
}
}
2024-04-06 11:05:37 +02:00
},
"type": {
"path": "product_type",
"type": "alias"
2024-03-12 15:57:14 +01:00
}
}
}
2024-03-18 11:02:07 +01:00
#
# EOSC CATALOG
#
mappings['services'] = {
"properties": {
"category": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-18 11:02:07 +01:00
"properties": {
"category": {
"type": "keyword"
},
"subcategory": {
"type": "keyword"
}
}
},
"description": {
"type": "text"
},
"domain": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-18 11:02:07 +01:00
"properties": {
"domain": {
"type": "keyword"
},
"subdomain": {
"type": "keyword"
}
}
},
"horizontal": {
"type": "boolean"
},
"id": {
"type": "keyword"
},
"interoperabilityGuidelines": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"language": {
"type": "keyword"
},
"name": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"organization": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
2024-03-27 12:19:01 +01:00
"providers": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
2024-03-18 11:02:07 +01:00
"tags": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"webpage": {
"type": "text"
}
}
}
mappings['interoperability'] = {
"properties": {
"description": {
"type": "text"
},
"id": {
"type": "keyword"
},
"license": {
"type": "keyword"
},
"organization": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"publicationYear": {
"type": "long"
},
"services": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-18 11:02:07 +01:00
"properties": {
"name": {
"type": "keyword"
},
"organization": {
"type": "keyword"
}
}
},
"title": {
"type": "text"
},
"type": {
"type": "keyword"
}
}
}
mappings['training'] = {
"properties": {
"accessRights": {
"type": "keyword"
},
"authors": {
2024-03-19 15:46:25 +01:00
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
2024-03-18 11:02:07 +01:00
}
2024-03-19 15:46:25 +01:00
},
"type": "text"
2024-03-18 11:02:07 +01:00
},
"contentResourceType": {
"type": "keyword"
},
"description": {
"type": "text"
},
2024-03-27 12:19:01 +01:00
"duration": {
"type": "keyword"
},
2024-03-18 11:02:07 +01:00
"domain": {
2024-03-18 13:49:25 +01:00
"type": "object",
2024-03-18 11:02:07 +01:00
"properties": {
"domain": {
"type": "keyword"
},
"subdomain": {
"type": "keyword"
}
}
},
2024-03-27 12:19:01 +01:00
"expertiseLevel": {
"type": "keyword"
},
2024-03-18 11:02:07 +01:00
"id": {
"type": "keyword"
},
"keyword": {
"type": "keyword"
},
"language": {
"type": "keyword"
},
"learningResourceType": {
"type": "keyword"
},
"license": {
"type": "keyword"
},
"organization": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
2024-03-27 12:19:01 +01:00
"qualifications": {
"type": "keyword"
},
2024-03-18 11:02:07 +01:00
"title": {
"type": "text"
},
2024-03-27 12:19:01 +01:00
"targetGroup": {
"type": "keyword"
},
2024-03-18 11:02:07 +01:00
"type": {
"type": "keyword"
},
"url": {
"type": "text"
}
}
}