Correct filtering for MAG records
This commit is contained in:
parent
5ddbb4ad10
commit
1788ac2d4d
|
@ -137,7 +137,7 @@ cites_df = spark.read.json(graph_folder + "/relation")\
|
|||
.where(
|
||||
(F.array_contains(F.col('collected_lower'), "opencitations"))
|
||||
| (F.array_contains(F.col('collected_lower'), "crossref"))
|
||||
| (F.array_contains(F.col('collected_lower'), "mag"))
|
||||
| (F.array_contains(F.col('collected_lower'), "microsoft academic graph"))
|
||||
).drop('collected_lower')
|
||||
# print ("Cited df has: " + str(cites_df.count()) + " entries")
|
||||
|
||||
|
|
Loading…
Reference in New Issue