From 9fa125b16fc207bd10cdda91e66027bfafc4b2be Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 16 Oct 2023 10:48:38 +0200 Subject: [PATCH] use saveParquet method to ensure mergerel output is compressed --- .../java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 2f551b244..39c265b41 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -155,8 +155,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { (FlatMapFunction) cc -> ccToMergeRel(cc, dedupConf), Encoders.bean(Relation.class)); - mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath); - + saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite); } }