forked from D-Net/dnet-hadoop
[DOIBOOST Process] fix filtering to filter results with non null id
This commit is contained in:
parent
a7cf277d98
commit
5fd0e610bf
|
@ -146,7 +146,7 @@ object SparkProcessMAG {
|
||||||
.save(s"$workingPath/mag_publication")
|
.save(s"$workingPath/mag_publication")
|
||||||
|
|
||||||
spark.read.load(s"$workingPath/mag_publication").as[Publication]
|
spark.read.load(s"$workingPath/mag_publication").as[Publication]
|
||||||
.filter(p => p.getId == null)
|
.filter(p => p.getId != null)
|
||||||
.groupByKey(p => p.getId)
|
.groupByKey(p => p.getId)
|
||||||
.reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
|
.reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
|
||||||
.map(_._2)
|
.map(_._2)
|
||||||
|
|
Loading…
Reference in New Issue