Fix SparkStatsTest bug where parquet tables were incorrectly read as text files leading to unpredictable count() values

This commit is contained in:
Giambattista Bloisi 2023-07-19 14:24:52 +02:00
parent e47ed1fdb2
commit dba34505de
1 changed files with 10 additions and 10 deletions

View File

@ -145,34 +145,34 @@ public class SparkStatsTest implements Serializable {
long orgs_blocks = spark long orgs_blocks = spark
.read() .read()
.textFile(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats") .load(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats")
.count(); .count();
long pubs_blocks = spark long pubs_blocks = spark
.read() .read()
.textFile(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats") .load(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats")
.count(); .count();
long sw_blocks = spark long sw_blocks = spark
.read() .read()
.textFile(testOutputBasePath + "/" + testActionSetId + "/software_blockstats") .load(testOutputBasePath + "/" + testActionSetId + "/software_blockstats")
.count(); .count();
long ds_blocks = spark long ds_blocks = spark
.read() .read()
.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats") .load(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats")
.count(); .count();
long orp_blocks = spark long orp_blocks = spark
.read() .read()
.textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats")
.count(); .count();
assertEquals(480, orgs_blocks); assertEquals(414, orgs_blocks);
assertEquals(295, pubs_blocks); assertEquals(187, pubs_blocks);
assertEquals(122, sw_blocks); assertEquals(128, sw_blocks);
assertEquals(191, ds_blocks); assertEquals(192, ds_blocks);
assertEquals(178, orp_blocks); assertEquals(194, orp_blocks);
} }
@AfterAll @AfterAll