- Fix the "is not NULL" checks inside "spark.filter()"

- Make sure the "outputPath" ends with a "/", in any case.
- Fix a parameter-description.
This commit is contained in:
Lampros Smyrnaios 2023-12-20 15:15:56 +02:00
parent 22fa60c3dd
commit 17282ea8fc
2 changed files with 5 additions and 4 deletions

View File

@ -100,10 +100,11 @@ public class ContinuousValidator {
parquet_file_path = args[1];
guidelines = args[2];
outputPath = args[3];
if (!outputPath.endsWith("/"))
outputPath += "/";
}
if (!outputPath.endsWith("/"))
outputPath += "/";
logger
.info(
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
@ -149,7 +150,7 @@ public class ContinuousValidator {
spark
.read()
.parquet(finalParquet_file_path)
.filter("encoding = 'XML' and id != NULL and body != null")
.filter("encoding = 'XML' and id is not NULL and body is not NULL")
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
.write()
.option("compression", "gzip")

View File

@ -13,7 +13,7 @@
<property>
<name>outputPath</name>
<value>.</value>
<description>the (float) version of the OpenAIRE Guidelines to validate the records against</description>
<description>the path of the output-directory where the result-json-files will be stored</description>
</property>
<property>
<name>sparkDriverMemory</name>