from typing import Dict, Any, List def map_access_right(ar: str) -> str: match ar: case 'open': return 'Open Access' case 'closed': return 'Closed' case 'embargo': return 'Embargo' case 'restricted': return 'Restricted' case _: return '' def trasform_graph_entity(p: dict) -> dict: p['_id'] = p['local_identifier'] return p def trasform_catalog_entity(p: dict) -> dict: p['_id'] = p['id'] return p def map_fos_topic_to_domain(fos: str): if fos.startswith('01'): return 'Natural Sciences' elif fos.startswith('02'): return 'Engineering & Technology' elif fos.startswith('03'): return 'Medical & Health Sciences' elif fos.startswith('04'): return 'Agricultural Sciences' elif fos.startswith('05'): return 'Social Sciences' elif fos.startswith('06'): return 'Humanities' return None def trasform_interoperability(p: dict) -> dict: p = trasform_catalog_entity(p) if 'domain' in p: p['domain'] = {"domain": p['domain']} p['licenseDetails'] = p['license'] p['license'] = p['license']['identifier'] if 'identifier' in p['license'] else '' return p def trasform_product(p: dict) -> dict: p = trasform_graph_entity(p) p['accessRights'] = list(set( filter(lambda ar: ar != '', map(lambda m: map_access_right(m.get('access_right')), p.get('manifestations'))))) p['keyword'] = list(set( map(lambda topic: topic.get('topic').get('value'), filter(lambda topic: topic.get('topic').get('scheme') == 'keyword', p.get('topics'))))) p['domain'] = list( map(lambda fos: {"domain": fos}, set(filter(lambda fos: fos is not None, map(lambda topic: map_fos_topic_to_domain(topic.get('topic').get('value')), filter(lambda topic: topic.get('topic').get('scheme') == 'FOS', p.get('topics'))))))) p['firstPublishDate'] = next( iter( sorted( map(lambda date: date.get('value'), filter(lambda date: date.get('type') == 'publishing', [date for m in (p.get('manifestations') or []) for date in (m.get('dates') or [])])))), None) return p transform_entities = { # SKG-IF graph entities "datasource": trasform_graph_entity, "grants": trasform_graph_entity, "organizations": trasform_graph_entity, "persons": trasform_graph_entity, "products": trasform_product, "topics": trasform_graph_entity, "venues": trasform_graph_entity, # EOSC catalog entities "interoperability": trasform_interoperability, "services": trasform_catalog_entity, "training": trasform_catalog_entity, } def isEmpty(current_value: Dict[str, Any], labels: List[str]) -> bool: if len(labels) <= 0: return True for label in labels: if isinstance(current_value, list) and len(current_value) > 0: current_value = current_value[0] if isinstance(current_value, dict) and label in current_value: current_value = current_value[label] else: return True if current_value is None: return True if isinstance(current_value, list): if len(current_value) > 0: return current_value[0] == "" else: return True return str(current_value) == "" # # Filter products that do not meet inclusion policy # def filter_product(p: dict) -> bool: if isEmpty(p, ["titles", "none"]): return True if isEmpty(p, ["firstPublishDate"]): return True if p['product_type'] == "literature": if isEmpty(p, ["abstracts", "none"]): return True if isEmpty(p, ["contributions", "person", "local_identifier"]): return True elif p['product_type'] in ["research data", "other"]: if isEmpty(p, ["contributions", "person", "local_identifier"]): return True return False filter_entities = { "products": filter_product }