2024-06-27 14:07:59 +02:00
|
|
|
from typing import Dict, Any, List
|
2024-03-25 17:54:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
def map_access_right(ar: str) -> str:
|
|
|
|
match ar:
|
|
|
|
case 'open':
|
|
|
|
return 'Open Access'
|
|
|
|
case 'closed':
|
|
|
|
return 'Closed'
|
|
|
|
case 'embargo':
|
|
|
|
return 'Embargo'
|
|
|
|
case 'restricted':
|
|
|
|
return 'Restricted'
|
|
|
|
case _:
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
2024-03-26 22:25:02 +01:00
|
|
|
def trasform_graph_entity(p: dict) -> dict:
|
|
|
|
p['_id'] = p['local_identifier']
|
|
|
|
return p
|
|
|
|
|
|
|
|
|
|
|
|
def trasform_catalog_entity(p: dict) -> dict:
|
|
|
|
p['_id'] = p['id']
|
|
|
|
return p
|
|
|
|
|
2024-04-05 17:41:22 +02:00
|
|
|
|
|
|
|
def map_fos_topic_to_domain(fos: str):
|
|
|
|
if fos.startswith('01'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Natural Sciences'
|
2024-04-05 18:04:42 +02:00
|
|
|
elif fos.startswith('02'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Engineering & Technology'
|
2024-04-05 18:04:42 +02:00
|
|
|
elif fos.startswith('03'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Medical & Health Sciences'
|
2024-04-05 18:04:42 +02:00
|
|
|
elif fos.startswith('04'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Agricultural Sciences'
|
2024-04-05 18:04:42 +02:00
|
|
|
elif fos.startswith('05'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Social Sciences'
|
2024-04-05 18:04:42 +02:00
|
|
|
elif fos.startswith('06'):
|
2024-04-05 19:09:36 +02:00
|
|
|
return 'Humanities'
|
2024-04-05 18:04:42 +02:00
|
|
|
|
2024-04-05 19:09:36 +02:00
|
|
|
return None
|
2024-04-05 17:41:22 +02:00
|
|
|
|
2024-04-09 11:04:06 +02:00
|
|
|
|
|
|
|
def trasform_interoperability(p: dict) -> dict:
|
|
|
|
p = trasform_catalog_entity(p)
|
|
|
|
if 'domain' in p:
|
|
|
|
p['domain'] = {"domain": p['domain']}
|
2024-04-18 12:01:43 +02:00
|
|
|
p['licenseDetails'] = p['license']
|
2024-04-18 13:13:15 +02:00
|
|
|
p['license'] = p['license']['identifier'] if 'identifier' in p['license'] else ''
|
2024-04-09 11:04:06 +02:00
|
|
|
return p
|
|
|
|
|
|
|
|
|
2024-03-25 17:54:23 +01:00
|
|
|
def trasform_product(p: dict) -> dict:
|
2024-03-26 22:25:02 +01:00
|
|
|
p = trasform_graph_entity(p)
|
2024-04-05 19:09:36 +02:00
|
|
|
p['accessRights'] = list(set(
|
|
|
|
filter(lambda ar: ar != '', map(lambda m: map_access_right(m.get('access_right')), p.get('manifestations')))))
|
|
|
|
p['keyword'] = list(set(
|
|
|
|
map(lambda topic: topic.get('topic').get('value'),
|
|
|
|
filter(lambda topic: topic.get('topic').get('scheme') == 'keyword', p.get('topics')))))
|
|
|
|
p['domain'] = list(
|
|
|
|
map(lambda fos: {"domain": fos},
|
|
|
|
set(filter(lambda fos: fos is not None,
|
|
|
|
map(lambda topic: map_fos_topic_to_domain(topic.get('topic').get('value')),
|
|
|
|
filter(lambda topic: topic.get('topic').get('scheme') == 'FOS', p.get('topics')))))))
|
2024-04-05 17:41:22 +02:00
|
|
|
p['firstPublishDate'] = next(
|
2024-04-05 17:55:30 +02:00
|
|
|
iter(
|
|
|
|
sorted(
|
|
|
|
map(lambda date: date.get('value'),
|
|
|
|
filter(lambda date: date.get('type') == 'publishing',
|
2024-04-05 17:59:20 +02:00
|
|
|
[date for m in (p.get('manifestations') or []) for date in (m.get('dates') or [])])))),
|
2024-04-05 17:41:22 +02:00
|
|
|
None)
|
2024-03-25 17:54:23 +01:00
|
|
|
return p
|
|
|
|
|
2024-04-05 17:41:22 +02:00
|
|
|
|
2024-03-25 17:54:23 +01:00
|
|
|
transform_entities = {
|
2024-03-26 22:25:02 +01:00
|
|
|
# SKG-IF graph entities
|
|
|
|
"datasource": trasform_graph_entity,
|
|
|
|
"grants": trasform_graph_entity,
|
|
|
|
"organizations": trasform_graph_entity,
|
|
|
|
"persons": trasform_graph_entity,
|
2024-04-05 17:41:22 +02:00
|
|
|
"products": trasform_product,
|
2024-03-26 22:25:02 +01:00
|
|
|
"topics": trasform_graph_entity,
|
|
|
|
"venues": trasform_graph_entity,
|
|
|
|
# EOSC catalog entities
|
2024-04-09 11:04:06 +02:00
|
|
|
"interoperability": trasform_interoperability,
|
2024-03-26 22:25:02 +01:00
|
|
|
"services": trasform_catalog_entity,
|
|
|
|
"training": trasform_catalog_entity,
|
2024-06-11 21:58:07 +02:00
|
|
|
}
|
2024-06-27 14:07:59 +02:00
|
|
|
|
2024-06-27 15:08:52 +02:00
|
|
|
|
2024-06-27 14:07:59 +02:00
|
|
|
def isEmpty(current_value: Dict[str, Any], labels: List[str]) -> bool:
|
|
|
|
if len(labels) <= 0:
|
|
|
|
return True
|
|
|
|
for label in labels:
|
2024-06-27 14:56:11 +02:00
|
|
|
if isinstance(current_value, list) and len(current_value) > 0:
|
2024-06-27 14:07:59 +02:00
|
|
|
current_value = current_value[0]
|
2024-06-27 18:54:23 +02:00
|
|
|
|
|
|
|
if isinstance(current_value, dict) and label in current_value:
|
2024-06-27 14:07:59 +02:00
|
|
|
current_value = current_value[label]
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
if current_value is None:
|
|
|
|
return True
|
|
|
|
if isinstance(current_value, list):
|
|
|
|
if len(current_value) > 0:
|
|
|
|
return current_value[0] == ""
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return str(current_value) == ""
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# Filter products that do not meet inclusion policy
|
|
|
|
#
|
|
|
|
def filter_product(p: dict) -> bool:
|
|
|
|
if isEmpty(p, ["titles", "none"]):
|
|
|
|
return True
|
|
|
|
|
|
|
|
if isEmpty(p, ["firstPublishDate"]):
|
|
|
|
return True
|
|
|
|
|
|
|
|
if p['product_type'] == "literature":
|
|
|
|
if isEmpty(p, ["abstracts", "none"]):
|
|
|
|
return True
|
|
|
|
if isEmpty(p, ["contributions", "person", "local_identifier"]):
|
|
|
|
return True
|
2024-06-27 14:22:50 +02:00
|
|
|
elif p['product_type'] in ["research data", "other"]:
|
2024-06-27 14:07:59 +02:00
|
|
|
if isEmpty(p, ["contributions", "person", "local_identifier"]):
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
filter_entities = {
|
|
|
|
"products": filter_product
|
|
|
|
}
|
2024-06-27 18:24:24 +02:00
|
|
|
|
|
|
|
|