code refactor

2021-02-01 14:58:06 +01:00 · 2021-02-01 14:58:06 +01:00 · bead34d11a
parent 6ff234d81b
commit bead34d11a
1 changed files with 23 additions and 15 deletions
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
@ -157,8 +157,9 @@ public class GenerateNativeStoreSparkJob {
 				final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
-				Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder);
+				final Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder);
 				final String targetPath = currentVersion.getHdfsPath() + DATASET_NAME;
 				if (readMdStoreVersion != null) {
 					// INCREMENTAL MODE
@ -168,28 +169,35 @@ public class GenerateNativeStoreSparkJob {
 						.as(encoder);
 					TypedColumn<MetadataRecord, MetadataRecord> aggregator = new MDStoreAggregator().toColumn();
-					mdstore = currentMdStoreVersion
+					saveDataset(
 						currentMdStoreVersion
 							.union(mdstore)
 							.groupByKey(
 								(MapFunction<MetadataRecord, String>) MetadataRecord::getId,
 								Encoders.STRING())
 							.agg(aggregator)
-						.map((MapFunction<Tuple2<String, MetadataRecord>, MetadataRecord>) Tuple2::_2, encoder);
+							.map((MapFunction<Tuple2<String, MetadataRecord>, MetadataRecord>) Tuple2::_2, encoder),
 						targetPath);
 				} else {
 					saveDataset(mdstore, targetPath);
 				}
 				mdstore
 					.write()
 					.mode(SaveMode.Overwrite)
 					.format("parquet")
 					.save(currentVersion.getHdfsPath() + DATASET_NAME);
 				mdstore = spark.read().load(currentVersion.getHdfsPath() + DATASET_NAME).as(encoder);
-				final Long total = mdstore.count();
+				final Long total = spark.read().load(targetPath).count();
 				AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size");
 			});
 	}
 	private static void saveDataset(final Dataset<MetadataRecord> currentMdStore, final String targetPath) {
 		currentMdStore
 			.write()
 			.mode(SaveMode.Overwrite)
 			.format("parquet")
 			.save(targetPath);
 	}
 	public static MetadataRecord parseRecord(
 		final String input,
 		final String xpath,