Aggiornare 'ScholexplorerPropagation.py'

This commit is contained in:
Miriam Baglioni 2020-06-29 18:12:18 +02:00
parent 6bc22d7e39
commit e1fc30f4a4
1 changed files with 3 additions and 3 deletions

View File

@ -148,9 +148,9 @@ def hasDescription(x):
return False
load_datasets = sc.textFile('/user/sandro.labruzzo/scholix/graph/dataset').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
load_publications = sc.textFile('/user/sandro.labruzzo/scholix/graph/publication').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
relations_rdd = spark.read.parquet('/user/sandro.labruzzo/scholix/graph/relation').rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
load_datasets = sc.textFile(<INPUT_DATASET_PATH>).map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
load_publications = sc.textFile(<INPUT_PUBLICATION_PATH>).map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
relations_rdd = spark.read.parquet(<INPUT_RELATION_PATH>).rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])
#relations from publication to dataset in the graph subset
pubs_relation = relations_rdd.filter(lambda x: x['source'][:2] == '50' and x['target'][:2] == '60' and x['relType'].lower() in paper_dataset_propagation)