diff --git a/ScholexplorerPropagation.py b/ScholexplorerPropagation.py index 9f82e8c..8484650 100644 --- a/ScholexplorerPropagation.py +++ b/ScholexplorerPropagation.py @@ -148,9 +148,9 @@ def hasDescription(x): return False -load_datasets = sc.textFile('/user/sandro.labruzzo/scholix/graph/dataset').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) -load_publications = sc.textFile('/user/sandro.labruzzo/scholix/graph/publication').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) -relations_rdd = spark.read.parquet('/user/sandro.labruzzo/scholix/graph/relation').rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +load_datasets = sc.textFile().map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +load_publications = sc.textFile().map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) +relations_rdd = spark.read.parquet().rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference']) #relations from publication to dataset in the graph subset pubs_relation = relations_rdd.filter(lambda x: x['source'][:2] == '50' and x['target'][:2] == '60' and x['relType'].lower() in paper_dataset_propagation)