Continuous Validation Workflow #388

Open
lsmyrnaios wants to merge 18 commits from lsmyrnaios/dnet-hadoop:continuous_validation2 into beta
2 changed files with 5 additions and 4 deletions
Showing only changes of commit 17282ea8fc - Show all commits

View File

@ -100,9 +100,10 @@ public class ContinuousValidator {
parquet_file_path = args[1]; parquet_file_path = args[1];
guidelines = args[2]; guidelines = args[2];
outputPath = args[3]; outputPath = args[3];
}
if (!outputPath.endsWith("/")) if (!outputPath.endsWith("/"))
outputPath += "/"; outputPath += "/";
}
logger logger
.info( .info(
@ -149,7 +150,7 @@ public class ContinuousValidator {
spark spark
.read() .read()
.parquet(finalParquet_file_path) .parquet(finalParquet_file_path)
.filter("encoding = 'XML' and id != NULL and body != null") .filter("encoding = 'XML' and id is not NULL and body is not NULL")
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class)) .map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
.write() .write()
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -13,7 +13,7 @@
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<value>.</value> <value>.</value>
<description>the (float) version of the OpenAIRE Guidelines to validate the records against</description> <description>the path of the output-directory where the result-json-files will be stored</description>
</property> </property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>