From e1fc30f4a4415d579aed548a61cf51eb6230f2f8 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 29 Jun 2020 18:12:18 +0200 Subject: [PATCH] Aggiornare 'ScholexplorerPropagation.py' --- ScholexplorerPropagation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ScholexplorerPropagation.py b/ScholexplorerPropagation.py index 9f82e8c..8484650 100644 --- a/ScholexplorerPropagation.py +++ b/ScholexplorerPropagation.py @@ -148,9 +148,9 @@ def hasDescription(x): return False -load_datasets = sc.textFile('/user/sandro.labruzzo/scholix/graph/dataset').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) -load_publications = sc.textFile('/user/sandro.labruzzo/scholix/graph/publication').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) -relations_rdd = spark.read.parquet('/user/sandro.labruzzo/scholix/graph/relation').rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +load_datasets = sc.textFile().map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +load_publications = sc.textFile().map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +relations_rdd = spark.read.parquet().rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) #relations from publication to dataset in the graph subset pubs_relation = relations_rdd.filter(lambda x: x['source'][:2] == '50' and x['target'][:2] == '60' and x['relType'].lower() in paper_dataset_propagation)