avoid non necessary count operations over large spark datasets
This commit is contained in:
parent
bb5dca7979
commit
c0750fb17c
|
@ -63,7 +63,9 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
|
|||
.toJavaRDD()
|
||||
.filter(x -> !isOpenorgs(x));
|
||||
|
||||
log.info("Number of non-Openorgs relations collected: {}", simRels.count());
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Number of non-Openorgs relations collected: {}", simRels.count());
|
||||
}
|
||||
|
||||
spark
|
||||
.createDataset(simRels.rdd(), Encoders.bean(Relation.class))
|
||||
|
|
Loading…
Reference in New Issue