Correct filtering for MAG records

This commit is contained in:
Ilias Kanellos 2023-05-12 12:55:43 +03:00
parent 5ddbb4ad10
commit 1788ac2d4d
1 changed files with 1 additions and 1 deletions

View File

@ -137,7 +137,7 @@ cites_df = spark.read.json(graph_folder + "/relation")\
.where(
(F.array_contains(F.col('collected_lower'), "opencitations"))
| (F.array_contains(F.col('collected_lower'), "crossref"))
| (F.array_contains(F.col('collected_lower'), "mag"))
| (F.array_contains(F.col('collected_lower'), "microsoft academic graph"))
).drop('collected_lower')
# print ("Cited df has: " + str(cites_df.count()) + " entries")