From c0750fb17c1950193646c3501ab8303c1634a454 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Nov 2021 17:11:31 +0100 Subject: [PATCH] avoid non necessary count operations over large spark datasets --- .../eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index bf0b7f687..9cc003bf6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -63,7 +63,9 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { .toJavaRDD() .filter(x -> !isOpenorgs(x)); - log.info("Number of non-Openorgs relations collected: {}", simRels.count()); + if (log.isDebugEnabled()) { + log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); + } spark .createDataset(simRels.rdd(), Encoders.bean(Relation.class))