From 0d1ec1913f40827b5e3fbda2575082072c9123ba Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 18:42:25 +0200 Subject: [PATCH] added fix to avoid duplication of results --- .../PrepareResultCommunitySet.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5d0b75a8e..750d333e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -95,20 +95,20 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .toJavaRDD() - .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) - .reduceByKey((a, b) -> { - ArrayList cl = a.getCommunityList(); - b.getCommunityList().stream().forEach(s -> { - if (!cl.contains(s)) { - cl.add(s); - } - }); - a.setCommunityList(cl); - return a; - }) - .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) - .saveAsTextFile(outputPath, GzipCodec.class); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); // .write() // .mode(SaveMode.Overwrite) // .option("compression", "gzip")