Continuous Validation Workflow #388

Open
lsmyrnaios wants to merge 18 commits from lsmyrnaios/dnet-hadoop:continuous_validation2 into beta
2 changed files with 5 additions and 4 deletions
Showing only changes of commit 17282ea8fc - Show all commits

View File

@ -100,10 +100,11 @@ public class ContinuousValidator {
parquet_file_path = args[1];
guidelines = args[2];
outputPath = args[3];
if (!outputPath.endsWith("/"))
outputPath += "/";
}
if (!outputPath.endsWith("/"))
outputPath += "/";
logger
.info(
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
@ -149,7 +150,7 @@ public class ContinuousValidator {
spark
.read()
.parquet(finalParquet_file_path)
.filter("encoding = 'XML' and id != NULL and body != null")
.filter("encoding = 'XML' and id is not NULL and body is not NULL")
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
.write()
.option("compression", "gzip")

View File

@ -13,7 +13,7 @@
<property>
<name>outputPath</name>
<value>.</value>
<description>the (float) version of the OpenAIRE Guidelines to validate the records against</description>
<description>the path of the output-directory where the result-json-files will be stored</description>
</property>
<property>
<name>sparkDriverMemory</name>