added repartition to one before writing so as to have just one file for each community product

This commit is contained in:
Miriam Baglioni 2020-06-22 17:38:02 +02:00
parent e983d02c1c
commit 004bf225cb
1 changed files with 1 additions and 0 deletions

View File

@ -92,6 +92,7 @@ public class SparkDumpRISISCatalogue implements Serializable {
value -> execMap(value, communityName),
Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class))
.filter(Objects::nonNull)
.repartition(1)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")