forked from D-Net/dnet-hadoop
avoid non necessary count operations over large spark datasets
This commit is contained in:
parent
bb5dca7979
commit
c0750fb17c
|
@ -63,7 +63,9 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
.filter(x -> !isOpenorgs(x));
|
.filter(x -> !isOpenorgs(x));
|
||||||
|
|
||||||
log.info("Number of non-Openorgs relations collected: {}", simRels.count());
|
if (log.isDebugEnabled()) {
|
||||||
|
log.debug("Number of non-Openorgs relations collected: {}", simRels.count());
|
||||||
|
}
|
||||||
|
|
||||||
spark
|
spark
|
||||||
.createDataset(simRels.rdd(), Encoders.bean(Relation.class))
|
.createDataset(simRels.rdd(), Encoders.bean(Relation.class))
|
||||||
|
|
Loading…
Reference in New Issue