forked from D-Net/dnet-hadoop
[DOIBOOST Process] fix filtering to filter results with non null id
This commit is contained in:
parent
a7cf277d98
commit
5fd0e610bf
|
@ -146,7 +146,7 @@ object SparkProcessMAG {
|
|||
.save(s"$workingPath/mag_publication")
|
||||
|
||||
spark.read.load(s"$workingPath/mag_publication").as[Publication]
|
||||
.filter(p => p.getId == null)
|
||||
.filter(p => p.getId != null)
|
||||
.groupByKey(p => p.getId)
|
||||
.reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
|
||||
.map(_._2)
|
||||
|
|
Loading…
Reference in New Issue