forked from D-Net/dnet-hadoop
Correct filtering for MAG records
This commit is contained in:
parent
5ddbb4ad10
commit
1788ac2d4d
|
@ -137,7 +137,7 @@ cites_df = spark.read.json(graph_folder + "/relation")\
|
||||||
.where(
|
.where(
|
||||||
(F.array_contains(F.col('collected_lower'), "opencitations"))
|
(F.array_contains(F.col('collected_lower'), "opencitations"))
|
||||||
| (F.array_contains(F.col('collected_lower'), "crossref"))
|
| (F.array_contains(F.col('collected_lower'), "crossref"))
|
||||||
| (F.array_contains(F.col('collected_lower'), "mag"))
|
| (F.array_contains(F.col('collected_lower'), "microsoft academic graph"))
|
||||||
).drop('collected_lower')
|
).drop('collected_lower')
|
||||||
# print ("Cited df has: " + str(cites_df.count()) + " entries")
|
# print ("Cited df has: " + str(cites_df.count()) + " entries")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue