1
0
Fork 0

avoid non necessary count operations over large spark datasets

This commit is contained in:
Claudio Atzori 2021-11-18 17:11:31 +01:00
parent bb5dca7979
commit c0750fb17c
1 changed files with 3 additions and 1 deletions

View File

@ -63,7 +63,9 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
.toJavaRDD() .toJavaRDD()
.filter(x -> !isOpenorgs(x)); .filter(x -> !isOpenorgs(x));
log.info("Number of non-Openorgs relations collected: {}", simRels.count()); if (log.isDebugEnabled()) {
log.debug("Number of non-Openorgs relations collected: {}", simRels.count());
}
spark spark
.createDataset(simRels.rdd(), Encoders.bean(Relation.class)) .createDataset(simRels.rdd(), Encoders.bean(Relation.class))