From 8461dc62cc19f18ad0acc702673a9665749d43b3 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 5 Apr 2024 19:09:36 +0200 Subject: [PATCH] initial stage --- airflow/dags/EOSC_entity_trasform.py | 35 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/airflow/dags/EOSC_entity_trasform.py b/airflow/dags/EOSC_entity_trasform.py index fbe1a8d..cb3ef0b 100644 --- a/airflow/dags/EOSC_entity_trasform.py +++ b/airflow/dags/EOSC_entity_trasform.py @@ -25,33 +25,33 @@ def trasform_catalog_entity(p: dict) -> dict: def map_fos_topic_to_domain(fos: str): - res = dict() if fos.startswith('01'): - res['domain'] = 'Natural Sciences' + return 'Natural Sciences' elif fos.startswith('02'): - res['domain'] = 'Engineering & Technology' + return 'Engineering & Technology' elif fos.startswith('03'): - res['domain'] = 'Medical & Health Sciences' + return 'Medical & Health Sciences' elif fos.startswith('04'): - res['domain'] = 'Agricultural Sciences' + return 'Agricultural Sciences' elif fos.startswith('05'): - res['domain'] = 'Social Sciences' + return 'Social Sciences' elif fos.startswith('06'): - res['domain'] = 'Humanities' + return 'Humanities' - return res + return None def trasform_product(p: dict) -> dict: p = trasform_graph_entity(p) - p['accessRights'] = list( - filter(lambda ar: ar != '', map(lambda m: map_access_right(m.get('access_right')), p.get('manifestations')))) - p['keyword'] = list( - map(lambda topic: topic.get('value'), - filter(lambda topic: topic.get('topic').get('scheme') == 'keyword', p.get('topics')))) - p['domain'] = list(filter(lambda fos: fos is not None, - map(lambda topic: map_fos_topic_to_domain(topic.get('topic').get('value')), - filter(lambda topic: topic.get('topic').get('scheme') == 'FOS', p.get('topics'))))) - + p['accessRights'] = list(set( + filter(lambda ar: ar != '', map(lambda m: map_access_right(m.get('access_right')), p.get('manifestations'))))) + p['keyword'] = list(set( + map(lambda topic: topic.get('topic').get('value'), + filter(lambda topic: topic.get('topic').get('scheme') == 'keyword', p.get('topics'))))) + p['domain'] = list( + map(lambda fos: {"domain": fos}, + set(filter(lambda fos: fos is not None, + map(lambda topic: map_fos_topic_to_domain(topic.get('topic').get('value')), + filter(lambda topic: topic.get('topic').get('scheme') == 'FOS', p.get('topics'))))))) p['firstPublishDate'] = next( iter( sorted( @@ -59,7 +59,6 @@ def trasform_product(p: dict) -> dict: filter(lambda date: date.get('type') == 'publishing', [date for m in (p.get('manifestations') or []) for date in (m.get('dates') or [])])))), None) - return p