Continuous Validation Workflow #388
|
@ -100,10 +100,11 @@ public class ContinuousValidator {
|
||||||
parquet_file_path = args[1];
|
parquet_file_path = args[1];
|
||||||
guidelines = args[2];
|
guidelines = args[2];
|
||||||
outputPath = args[3];
|
outputPath = args[3];
|
||||||
if (!outputPath.endsWith("/"))
|
|
||||||
outputPath += "/";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!outputPath.endsWith("/"))
|
||||||
|
outputPath += "/";
|
||||||
|
|
||||||
logger
|
logger
|
||||||
.info(
|
.info(
|
||||||
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
|
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
|
||||||
|
@ -149,7 +150,7 @@ public class ContinuousValidator {
|
||||||
spark
|
spark
|
||||||
.read()
|
.read()
|
||||||
.parquet(finalParquet_file_path)
|
.parquet(finalParquet_file_path)
|
||||||
.filter("encoding = 'XML' and id != NULL and body != null")
|
.filter("encoding = 'XML' and id is not NULL and body is not NULL")
|
||||||
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
|
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<value>.</value>
|
<value>.</value>
|
||||||
<description>the (float) version of the OpenAIRE Guidelines to validate the records against</description>
|
<description>the path of the output-directory where the result-json-files will be stored</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
|
|
Loading…
Reference in New Issue