forked from D-Net/dnet-hadoop
added repartition to one before writing so as to have just one file for each community product
This commit is contained in:
parent
e983d02c1c
commit
004bf225cb
|
@ -92,6 +92,7 @@ public class SparkDumpRISISCatalogue implements Serializable {
|
||||||
value -> execMap(value, communityName),
|
value -> execMap(value, communityName),
|
||||||
Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class))
|
Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.repartition(1)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
Loading…
Reference in New Issue