From dba34505de8d3d8704529c66afe4acdcac8944fc Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 19 Jul 2023 14:24:52 +0200 Subject: [PATCH] Fix SparkStatsTest bug where parquet tables were incorrectly read as text files leading to unpredictable count() values --- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index b33b627e75..07e9934449 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -145,34 +145,34 @@ public class SparkStatsTest implements Serializable { long orgs_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats") .count(); long pubs_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats") .count(); long sw_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/software_blockstats") .count(); long ds_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats") .count(); long orp_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(480, orgs_blocks); - assertEquals(295, pubs_blocks); - assertEquals(122, sw_blocks); - assertEquals(191, ds_blocks); - assertEquals(178, orp_blocks); + assertEquals(414, orgs_blocks); + assertEquals(187, pubs_blocks); + assertEquals(128, sw_blocks); + assertEquals(192, ds_blocks); + assertEquals(194, orp_blocks); } @AfterAll