From 5ec8c49ad5144d056512ac25ea3a8444e71ca4f1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 May 2020 12:49:58 +0200 Subject: [PATCH] removed serialization points --- .../dhp/blacklist/SparkRemoveBlacklistedRelationJob.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 92289ec2d..91bcb9d1c 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -79,8 +79,6 @@ public class SparkRemoveBlacklistedRelationJob { Dataset inputRelation = readRelations(spark, inputPath); Dataset mergesRelation = readRelations(spark, mergesPath); - log.info("InputRelationCount: {}", inputRelation.count()); - Dataset dedupSource = blackListed .joinWith( mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")), @@ -103,11 +101,6 @@ public class SparkRemoveBlacklistedRelationJob { return c._1(); }, Encoders.bean(Relation.class)); - dedupBL - .write() - .mode(SaveMode.Overwrite) - .json(blacklistPath + "/deduped"); - inputRelation .joinWith( dedupBL, (inputRelation