diff --git a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java index f1b02bc62..72e10bdb8 100644 --- a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java +++ b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java @@ -100,10 +100,11 @@ public class ContinuousValidator { parquet_file_path = args[1]; guidelines = args[2]; outputPath = args[3]; - if (!outputPath.endsWith("/")) - outputPath += "/"; } + if (!outputPath.endsWith("/")) + outputPath += "/"; + logger .info( "Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \"" @@ -149,7 +150,7 @@ public class ContinuousValidator { spark .read() .parquet(finalParquet_file_path) - .filter("encoding = 'XML' and id != NULL and body != null") + .filter("encoding = 'XML' and id is not NULL and body is not NULL") .map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class)) .write() .option("compression", "gzip") diff --git a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml index 7a99c76dc..137c5c8cc 100644 --- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml @@ -13,7 +13,7 @@ outputPath . - the (float) version of the OpenAIRE Guidelines to validate the records against + the path of the output-directory where the result-json-files will be stored sparkDriverMemory