[DOIBOOST Process] fix filtering to filter results with non null id

This commit is contained in:
Miriam Baglioni 2021-11-25 12:10:45 +01:00
parent a7cf277d98
commit 5fd0e610bf
1 changed files with 1 additions and 1 deletions

View File

@ -146,7 +146,7 @@ object SparkProcessMAG {
.save(s"$workingPath/mag_publication") .save(s"$workingPath/mag_publication")
spark.read.load(s"$workingPath/mag_publication").as[Publication] spark.read.load(s"$workingPath/mag_publication").as[Publication]
.filter(p => p.getId == null) .filter(p => p.getId != null)
.groupByKey(p => p.getId) .groupByKey(p => p.getId)
.reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
.map(_._2) .map(_._2)