Initial commit of the "dhp-continuous-validation" module.

2023-12-15 15:53:31 +02:00 · 2023-12-15 15:53:31 +02:00 · 9e6a03e4e2
parent 84d54643cf
commit 9e6a03e4e2
20 changed files with 1697 additions and 0 deletions
--- a/dhp-workflows/dhp-continuous-validation/README.md
+++ b/dhp-workflows/dhp-continuous-validation/README.md
@ -0,0 +1,3 @@
+# Continuous Validation
+
+[...]
--- a/dhp-workflows/dhp-continuous-validation/installAndRun.sh
+++ b/dhp-workflows/dhp-continuous-validation/installAndRun.sh
@ -0,0 +1,38 @@
+# This script installs and runs the project.
+
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+
+# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105
+# So we have our own function, for use when a single command fails.
+handle_error () {
+  echo -e "\n\n$1\n\n"; exit $2
+}
+
+# Change the working directory to the script's directory, when running from another location.
+cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
+
+if [[ $# -eq 0 ]]; then
+  	echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <sparkMaster> <justRun: 0 | 1>";	exit 2
+fi
+
+sparkMaster=""
+justRun=0
+
+if [[ $# -eq 1 ]]; then
+  sparkMaster=$1
+elif [[ $# -eq 2 ]]; then
+  sparkMaster=$1
+	justRun=$2
+elif [[ $# -gt 2 ]]; then
+	echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <sparkMaster> <justRun: 0 | 1>";	exit 3
+fi
+
+if [[ justRun -eq 0 ]]; then
+  mvn clean install ${CHOSEN_MAVEN_PROFILE}
+fi
+ContinuousValidator
+test_parquet_file="./src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet"
+java -jar ./target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar ${sparkMaster} ${test_parquet_file} 1
--- a/dhp-workflows/dhp-continuous-validation/libs/uoa-validator-engine2-0.9.0.jar
+++ b/dhp-workflows/dhp-continuous-validation/libs/uoa-validator-engine2-0.9.0.jar
--- a/dhp-workflows/dhp-continuous-validation/pom.xml
+++ b/dhp-workflows/dhp-continuous-validation/pom.xml
@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>dhp-workflows</artifactId>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <version>1.2.5-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>dhp-continuous-validation</artifactId>
+    <!-- The "version" is inherited from the parent module. -->
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <jackson.version>2.14.3</jackson.version> <!-- Scala doesn't want a higher version than 2.14.x -->
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>eu.dnetlib</groupId>
+            <artifactId>uoa-validator-engine2</artifactId>
+            <version>0.9.0</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+            <version>${dhp.spark.version}</version>
+            <!--<scope>provided</scope>-->
+<!--
+            <scope>compile</scope>
+-->
+
+            <exclusions>
+                <!-- This is an older version which causes problems. We have to add the latest version independently. -->
+                <!-- This exists only in Hadoop 3.0.0+ -->
+                <!--<exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client-api</artifactId>
+                </exclusion>-->
+                <exclusion>
+                    <groupId>ch.qos.logback</groupId>
+                    <artifactId>logback-classic</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.logging.log4j</groupId>
+                    <artifactId>log4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+
+        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${dhp.spark.version}</version>
+            <!--<scope>provided</scope>-->
+            <!--<scope>compile</scope>-->
+            <exclusions>
+                <!-- This exclusion is a must for scala 2.11 and spark 2.4.0.cloudera2 -->
+                <exclusion>
+                    <groupId>com.twitter</groupId>
+                    <artifactId>parquet-format</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+
+        <!-- https://mvnrepository.com/artifact/org.apache.parquet/parquet-avro -->
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+            <version>1.13.1</version>
+        </dependency>
+
+
+        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+            <!--<version>${dhp.hadoop.version}</version>-->
+            <!--<scope>compile</scope>-->
+
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-reload4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>ch.qos.reload4j</groupId>
+                    <artifactId>reload4j</artifactId>
+                </exclusion>
+
+                <!-- Vulnerable dependencies: -->
+                <exclusion>
+                    <groupId>com.google.protobuf</groupId>
+                    <artifactId>protobuf-java</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.codehaus.jackson</groupId>
+                    <artifactId>jackson-core-asl</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.codehaus.jackson</groupId>
+                    <artifactId>jackson-mapper-asl</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.woodstox</groupId>
+                    <artifactId>woodstox-core</artifactId>
+                </exclusion>
+                <!-- This dependency is required in order for the program to run without errors. It is discontinued. -->
+                <!--<exclusion>
+                    <groupId>commons-collections</groupId>
+                    <artifactId>commons-collections</artifactId>
+                </exclusion>-->
+            </exclusions>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-core -->
+        <dependency>
+            <groupId>io.dropwizard.metrics</groupId>
+            <artifactId>metrics-core</artifactId>
+            <version>4.2.22</version>
+<!--            <scope>compile</scope>-->
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>${jackson.version}</version>
+            <!--            <scope>compile</scope>-->
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>${jackson.version}</version>
+            <!--            <scope>compile</scope>-->
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-annotations</artifactId>
+            <version>${jackson.version}</version>
+<!--            <scope>compile</scope>-->
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-app -->
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-app</artifactId>
+            <version>${dhp.hadoop.version}</version>
+
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+
+                <!-- Vulnerable dependencies: -->
+                <exclusion>
+                    <groupId>com.google.protobuf</groupId>
+                    <artifactId>protobuf-java</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>io.netty</groupId>
+                    <artifactId>netty</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+
+
+        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
+        <!--<dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-core</artifactId>
+            <version>${dhp.hadoop.version}</version>
+
+            &lt;!&ndash;            <scope>compile</scope>&ndash;&gt;
+
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-reload4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>ch.qos.reload4j</groupId>
+                    <artifactId>reload4j</artifactId>
+                </exclusion>
+
+                &lt;!&ndash; Vulnerable dependencies: &ndash;&gt;
+                <exclusion>
+                    <groupId>com.google.protobuf</groupId>
+                    <artifactId>protobuf-java</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>io.netty</groupId>
+                    <artifactId>netty</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>-->
+
+
+        <!-- Add back some updated version of the needed dependencies. -->
+        <!-- This should be enabled only when using Hadoop 3.0.0+ -->
+        <!--<dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client-api</artifactId>
+            <version>${dhp.hadoop.version}</version>
+        </dependency>-->
+        <dependency>    <!-- Newer versions (>=0.18.X) are not compatible with JAVA 8. -->
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+            <version>0.17.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.woodstox</groupId>
+            <artifactId>woodstox-core</artifactId>
+            <version>6.5.1</version>
+        </dependency>
+
+
+        <!-- Other dependencies. -->
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.10.1</version>
+        </dependency>
+
+        <!-- logback versions 1.4.X require Java-11 -->
+        <!-- logback versions 1.3.X require Java-8, but if this project is added as Dependency in a Spring Boot App (with v.2.7.x), then Spring Boot throws an error, since it does not yet support logback 1.3.x -->
+
+        <!-- https://mvnrepository.com/artifact/ch.qos.logback/logback-core -->
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-core</artifactId>
+            <version>1.2.13</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.36</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/ch.qos.logback/logback-classic -->
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>1.2.13</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-engine -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <version>5.10.1</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-common</artifactId>
+            <version>${project.version}</version>
+            <!--            <scope>compile</scope>-->
+        </dependency>
+
+    </dependencies>
+
+    <!--<build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.5.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <shadedArtifactAttached>false</shadedArtifactAttached>
+                            <transformers>
+                                &lt;!&ndash; add Main-Class to manifest file &ndash;&gt;
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>eu.dnetlib.dhp.continuous_validator.ContinuousValidator</mainClass>
+                                </transformer>
+                            </transformers>
+                            <filters>
+                                &lt;!&ndash; filter manifest signature files when creating uber-jar &ndash;&gt;
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/INDEX.LIST</exclude>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>LICENSE*</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            &lt;!&ndash; The "minimize" config breaks things at runtime. &ndash;&gt;
+                            &lt;!&ndash;<minimizeJar>true</minimizeJar>&ndash;&gt;
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>3.2.2</version>
+                <configuration>
+                    &lt;!&ndash;<excludes>
+                      <exclude>some test to exclude here</exclude>
+                    </excludes>&ndash;&gt;
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>-->
+
+    <repositories>
+        <repository>
+            <id>libs</id>
+            <url>file:///${project.basedir}/libs</url>
+        </repository>
+    </repositories>
+
+</project>
--- a/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh
+++ b/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh
@ -0,0 +1,9 @@
+# This script deploys and runs the oozie workflow.
+
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+
+mvn clean package ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
+      -Dworkflow.source.dir=eu/dnetlib/dhp/continuous_validator
--- a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java
+++ b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java
@ -0,0 +1,211 @@
+
+package eu.dnetlib.dhp.continuous_validator;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.BufferedWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+import javax.jws.WebParam;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.*;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.validator2.validation.XMLApplicationProfile;
+import eu.dnetlib.validator2.validation.guideline.openaire.AbstractOpenAireProfile;
+import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile;
+import eu.dnetlib.validator2.validation.utils.TestUtils;
+import scala.Option;
+
+public class ContinuousValidator {
+
+	public static final String TEST_FILES_V4_DIR = TestUtils.TEST_FILES_BASE_DIR + "openaireguidelinesV4/";
+	public static final String RESULTS_FILE = "results.json";
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
+	private static final String parametersFile = "input_continuous_validator_parameters.json";
+
+	private static final boolean SHOULD_GET_ARGUMENTS_FROM_FILE = true; // It throws an error for now..
+
+	public static void main(String[] args) {
+
+		ArgumentApplicationParser parser = null;
+		String sparkMaster = null;
+		Boolean isSparkSessionManaged = false;
+		String parquet_file_path = null;
+		String guidelines = null;
+		String outputPath = null;
+
+		if (SHOULD_GET_ARGUMENTS_FROM_FILE) {
+			try {
+				String jsonConfiguration = IOUtils
+					.toString(
+						Objects
+							.requireNonNull(
+								ContinuousValidator.class
+									.getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)),
+						StandardCharsets.UTF_8);
+
+				parser = new ArgumentApplicationParser(jsonConfiguration);
+				parser.parseArgument(args);
+
+				String isSParkSessionManagedStr = parser.get("isSparkSessionManaged");
+				if (isSParkSessionManagedStr == null) {
+					logger
+						.error(
+							"The \"isSParkSessionManagedStr\" was not retrieved from the parameters file: "
+								+ parametersFile);
+					return;
+				}
+
+				// This "is needed to implement a unit test in which the spark session is created in the context of the
+				// unit test itself rather than inside the spark application"
+				isSparkSessionManaged = Optional
+					.of(isSParkSessionManagedStr)
+					.map(Boolean::valueOf)
+					.orElse(Boolean.TRUE);
+
+				logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+				// TODO - If the above is tru,e then the Spark-session defined in the unit-test should be used..
+
+			} catch (Exception e) {
+				logger.error("Error when parsing the parameters!", e);
+				return;
+			}
+
+			parquet_file_path = parser.get("parquet_file_path");
+			if (parquet_file_path == null) {
+				logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
+				return;
+			}
+
+			guidelines = parser.get("guidelines");
+			if (guidelines == null) {
+				logger.error("The \"guidelines\" was not retrieved from the parameters file: " + parametersFile);
+				return;
+			}
+
+			outputPath = parser.get("outputPath");
+			if (outputPath == null) {
+				logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile);
+				return;
+			}
+
+			sparkMaster = "local[*]";
+		} else {
+			if (args.length != 4) {
+				String errorMsg = "Wrong number of arguments given! Please run the app like so: java -jar target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines> <outputPath>";
+				System.err.println(errorMsg);
+				logger.error(errorMsg);
+				System.exit(1);
+			}
+			sparkMaster = args[0];
+			logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
+
+			parquet_file_path = args[1];
+			guidelines = args[2];
+			outputPath = args[3];
+			if (!outputPath.endsWith("/"))
+				outputPath += "/";
+		}
+
+		logger
+			.info(
+				"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
+					+ guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE);
+
+
+		// TODO - USE THE "runWithSparkSession" METHOD TO RUN THE SPARK CODE INSIDE!!
+		AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile();
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(ContinuousValidator.class.getSimpleName());
+		String finalParquet_file_path = parquet_file_path;
+		String finalOutputPath = outputPath;
+
+		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
+			Dataset<Row> parquetFileDF = spark.read().parquet(finalParquet_file_path);
+			parquetFileDF.show(5);
+
+			// Filter the results based on the XML-encoding and non-null id and body.
+			parquetFileDF = parquetFileDF
+				.filter(
+					parquetFileDF
+						.col("encoding")
+						.eqNullSafe("XML")
+						.and(parquetFileDF.col("id").isNotNull())
+						.and(parquetFileDF.col("body").isNotNull()));
+
+			// Use a new instance of Document Builder in each worker, as it is not thread-safe.
+			MapFunction<Row, XMLApplicationProfile.ValidationResult> validateMapFunction = row -> profile
+				.validate(
+					row.getAs("id").toString(),
+					TestUtils
+						.getDocumentBuilder()
+						.parse(IOUtils.toInputStream(row.getAs("body").toString(), StandardCharsets.UTF_8)));
+
+			Dataset<XMLApplicationProfile.ValidationResult> validationResultsDataset = parquetFileDF
+				.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class));
+
+			logger.info("Showing a few validation-results.. just for checking");
+			validationResultsDataset.show(5);
+
+			// Write the results to json file immediately, without converting them to a list.
+			validationResultsDataset
+				.write()
+				.option("compression", "gzip")
+				.mode(SaveMode.Overwrite)
+				.json(finalOutputPath + RESULTS_FILE); // The filename should be the name of the input-file or the
+														// input-directory.
+
+			if (logger.isDebugEnabled()) {
+				List<XMLApplicationProfile.ValidationResult> validationResultsList = validationResultsDataset
+					.javaRDD()
+					.collect();
+
+				if (validationResultsList.isEmpty()) {
+					logger.error("The \"validationResultsList\" was empty!");
+					return;
+				}
+
+				validationResultsList.forEach(vr -> logger.debug(vr.id() + " | score:" + vr.score()));
+				for (XMLApplicationProfile.ValidationResult result : validationResultsList)
+					logger.debug(result.toString());
+			}
+
+			// TODO - REMOVE THIS WHEN THE WRITE FROM ABOVE IS OK
+			/*
+			 * try (BufferedWriter writer = Files .newBufferedWriter(Paths.get(outputPath + RESULTS_FILE),
+			 * StandardCharsets.UTF_8)) { writer.write(new Gson().toJson(validationResultsList)); } catch (Exception e)
+			 * { logger.error("Error when writing the \"validationResultsList\" as json into the results-file: " +
+			 * outputPath + RESULTS_FILE); return; }
+			 */
+
+			Option<String> uiWebUrl = spark.sparkContext().uiWebUrl();
+			if (uiWebUrl.isDefined()) {
+				logger
+					.info(
+						"Waiting 60 seconds, before shutdown, for the user to check the jobs' status at: "
+							+ uiWebUrl.get());
+				try {
+					Thread.sleep(60_000);
+				} catch (InterruptedException ignored) {
+				}
+			} else
+				logger.info("The \"uiWebUrl\" is not defined, in order to check the jobs' status. Shutting down..");
+		});
+	}
+}
--- a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/utils/ParquetUtils.java
+++ b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/utils/ParquetUtils.java
@ -0,0 +1,101 @@
+
+package eu.dnetlib.dhp.continuous_validator.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.avro.AvroParquetReader;
+import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.InputFile;
+import org.slf4j.LoggerFactory;
+
+public class ParquetUtils {
+
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ParquetUtils.class);
+
+	private static final Configuration parquetConfig = new Configuration();
+
+	public static List<GenericRecord> getParquetRecords(String fullFilePath) {
+		InputFile inputFile;
+		try { // TODO - Verify that this will create any directories which do not exist in the provided path. Currently
+				// we create the directories beforehand.
+			inputFile = HadoopInputFile.fromPath(new Path(fullFilePath), parquetConfig);
+			// logger.trace("Created the parquet " + outputFile); // DEBUG!
+		} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown.
+								// "Throwable" catches EVERYTHING!
+			logger.error("", e);
+			return null;
+		}
+
+		List<GenericRecord> records = new ArrayList<>();
+		GenericRecord record;
+		try (ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord> builder(inputFile).build()) {
+			while ((record = reader.read()) != null) {
+				records.add(record);
+			}
+		} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown.
+								// "Throwable" catches EVERYTHING!
+			logger.error("Problem when creating the \"ParquetWriter\" object or when writing the records with it!", e);
+
+			// At some point, I got an "NoSuchMethodError", because of a problem in the AvroSchema file:
+			// (java.lang.NoSuchMethodError: org.apache.avro.Schema.getLogicalType()Lorg/apache/avro/LogicalType;).
+			// The error was with the schema: {"name": "date", "type" : ["null", {"type" : "long", "logicalType" :
+			// "timestamp-millis"}]},
+			return null;
+		}
+
+		return records; // It may be empty.
+	}
+
+	public static Map<String, String> getIdXmlMapFromParquetFile(String parquetFileFullPath) {
+		List<GenericRecord> recordList = ParquetUtils.getParquetRecords(parquetFileFullPath);
+		if (recordList == null)
+			return null; // The error is already logged.
+		else if (recordList.isEmpty()) {
+			logger.error("The parquet-file \"" + parquetFileFullPath + "\" had no records inside!");
+			return null;
+		}
+
+		Map<String, String> idXmlMap = new HashMap<>();
+
+		for (GenericRecord record : recordList) {
+			if (logger.isTraceEnabled())
+				logger.trace(record.toString());
+
+			Object id = record.get("id");
+			if (id == null)
+				continue;
+			String idStr = id.toString();
+
+			Object encoding = record.get("encoding");
+			if (encoding == null) {
+				logger.warn("Record with id = \"" + idStr + "\" does not provide the encoding for its body!");
+				continue;
+			}
+			String encodingStr = encoding.toString();
+			if (!encodingStr.equals("XML")) {
+				logger.warn("Record with id = \"" + idStr + "\" does not have XML encoding for its body!");
+				continue;
+			}
+
+			Object body = record.get("body");
+			if (body == null) {
+				logger.warn("Record with id = \"" + idStr + "\" does not have a body!");
+				continue;
+			}
+			String bodyStr = body.toString();
+
+			idXmlMap.put(idStr, bodyStr);
+			// logger.debug(idStr + " | " + idXmlMap.get(idStr));
+		}
+
+		return idXmlMap;
+	}
+
+}
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/config-default.xml
@ -0,0 +1,22 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>sparkExecutorMemoryOverhead</name>
+        <value>1G</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml
@ -0,0 +1,111 @@
+<workflow-app name="Validate metadata records against OpenAIRE Guidelines" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>parquet_file_path</name>
+            <value>./src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet</value>
+            <description>the full path of the parquet file</description>
+        </property>
+        <property>
+            <name>openaire_guidelines</name>
+            <value>4.0</value>
+            <description>the version of the OpenAIRE Guidelines to validate the records against</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <value>.</value>
+            <description>the (float) version of the OpenAIRE Guidelines to validate the records against</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <value>4096M</value>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <value>2048m</value>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <value>4</value>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <!--<property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>-->
+        <property>
+            <name>spark2EventLogDir</name>
+            <value>spark2/logs</value>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <!--<property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>-->
+        </configuration>
+    </global>
+
+    <start to="ValidateMetadataRecords"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ValidateMetadataRecords">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Validate multiple records against OpenAIRE Guidelines</name>
+            <class>eu.dnetlib.dhp.continuous_validator.ContinuousValidator</class>
+            <jar>dhp-continuous-validation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+               <!-- &#45;&#45;conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                &#45;&#45;conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <!-- Arguments passed to the "main" method of the class defined above. -->
+            <arg>--parquet_file_path</arg><arg>${parquet_file_path}</arg>
+            <arg>--openaire_guidelines</arg><arg>${openaire_guidelines}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/input_continuous_validator_parameters.json
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/input_continuous_validator_parameters.json
@ -0,0 +1,26 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "prq_file",
+    "paramLongName": "parquet_file_path",
+    "paramDescription": "the full path for the parquet file to be processed",
+    "paramRequired": true
+  },
+  {
+    "paramName": "guidelines",
+    "paramLongName": "openaire_guidelines",
+    "paramDescription": "the version of the OpenAIRE Guidelines to validate the records against",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path of the output-directory where the result-json-files will be stored",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/logback.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/logback.xml
@ -0,0 +1,43 @@
+<configuration debug="false">
+
+    <shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>   <!-- Default delay: 0ms (zero) -->
+
+<!--    <Appenders>-->
+        <appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/ContinuousValidator.log</file>
+
+            <rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
+                <fileNamePattern>logs/ContinuousValidator.%i.log.zip</fileNamePattern>
+                <minIndex>1</minIndex>
+                <maxIndex>10</maxIndex>
+            </rollingPolicy>
+
+            <triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
+                <maxFileSize>100MB</maxFileSize>  <!-- Lower the size to check the "RollingFileAppender" during tests. -->
+            </triggeringPolicy>
+            <encoder>
+                <charset>UTF-8</charset>
+                <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
+            </encoder>
+        </appender>
+
+        <appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
+            <encoder>
+                <charset>UTF-8</charset>
+                <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
+            </encoder>
+        </appender>
+<!--    </Appenders>-->
+
+        <root level="debug">
+            <appender-ref ref="Console" />
+        </root>
+
+        <logger name="eu.dnetlib.dhp.continuous_validator" level="trace"/>
+        <logger name="eu.dnetlib.validator2" level="error"/>
+        <logger name="org.sparkproject" level="info"/>
+        <logger name="org.apache.spark" level="info"/>
+        <logger name="org.apache.hadoop" level="info"/>
+        <logger name="org.apache.parquet" level="info"/>
+
+</configuration>
--- a/dhp-workflows/dhp-continuous-validation/src/test/java/ReadParquetDataTest.java
+++ b/dhp-workflows/dhp-continuous-validation/src/test/java/ReadParquetDataTest.java
@ -0,0 +1,33 @@
+import java.util.Map;
+
+import org.junit.jupiter.api.Test;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.continuous_validator.utils.ParquetUtils;
+import eu.dnetlib.validator2.validation.utils.TestUtils;
+
+public class ReadParquetDataTest {
+
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ReadParquetDataTest.class);
+
+	private static final String parquetFileFullPath = TestUtils.TEST_FILES_BASE_DIR
+		+ "part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet";
+
+	public static void main(String[] args) {
+
+		testParquetRead();
+	}
+
+	@Test
+	public static void testParquetRead() {
+		Map<String, String> idXmlMap = ParquetUtils.getIdXmlMapFromParquetFile(parquetFileFullPath);
+		if (idXmlMap == null) {
+			logger.error("Could not create the \"idXmlMap\" from parquet-file: " + parquetFileFullPath);
+			System.exit(99);
+		} else if (idXmlMap.isEmpty())
+			logger.warn("The generated \"idXmlMap\" was empty, for parquet-file: " + parquetFileFullPath);
+		else
+			logger.info("The \"idXmlMap\" was successfully generated, for parquet-file: " + parquetFileFullPath);
+	}
+
+}
--- a/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java
+++ b/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java
@ -0,0 +1,34 @@
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.util.List;
+
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.continuous_validator.ContinuousValidator;
+
+public class ReadResultsTest {
+
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
+
+	public static void main(String[] args) {
+
+		try {
+			List standardValidationResultList = new Gson()
+				.fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE)), List.class);
+			if (standardValidationResultList == null)
+				logger.error("Could not map the json to a \"List\" object.");
+			else if (standardValidationResultList.isEmpty())
+				logger.warn("The \"standardValidationResultList\" is empty!");
+			else
+				logger.info(standardValidationResultList.toString());
+		} catch (FileNotFoundException fnfe) {
+			logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE + "\" does not exist!");
+		} catch (Exception e) {
+			logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE + "\"", e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-continuous-validation/src/test/java/ValidateTestFiles.java
+++ b/dhp-workflows/dhp-continuous-validation/src/test/java/ValidateTestFiles.java
@ -0,0 +1,126 @@
+import static eu.dnetlib.dhp.continuous_validator.ContinuousValidator.TEST_FILES_V4_DIR;
+
+import java.io.BufferedWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonIOException;
+
+import eu.dnetlib.dhp.continuous_validator.ContinuousValidator;
+import eu.dnetlib.validator2.validation.XMLApplicationProfile;
+import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile;
+import eu.dnetlib.validator2.validation.utils.TestUtils;
+import scala.Option;
+
+public class ValidateTestFiles {
+
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
+
+	public static final String RESULTS_FILE = "results.json";
+
+	public static void main(String[] args) {
+		if (args.length != 3) {
+			String errorMsg = "Wrong number of arguments given! PLease run the app like so: java -jar build/libs/continuous-validator-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines>";
+			System.err.println(errorMsg);
+			logger.error(errorMsg);
+			System.exit(1);
+		}
+		String sparkMaster = args[0];
+		logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
+
+		String parquetFileFullPath = args[1];
+		String guidelines = args[2];
+		logger
+			.info(
+				"Will validate the contents of parquetFile: \"" + parquetFileFullPath + "\", against guidelines: \""
+					+ guidelines + "\".");
+
+		SparkConf sparkConf = new SparkConf();
+		sparkConf.setAppName("Continuous-Validator");
+		sparkConf.setMaster(sparkMaster); // Run on the Spark Cluster.
+		sparkConf.set("spark.driver.memory", "4096M");
+		sparkConf
+			.set("spark.executor.instances", "4") // 4 executors
+			.set("spark.executor.cores", "1"); // 1 core per executor
+		sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+		sparkConf.set("spark.rdd.compress", "true");
+
+		String appVersion = "1.0.0-SNAPSHOT";
+		/*
+		 * try { Class<?> klass = Class.forName("eu.dnetlib.continuous_validator.BuildConfig"); appVersion = (String)
+		 * klass.getDeclaredField("version").get(null); if ( logger.isTraceEnabled() )
+		 * logger.trace("The app's version is: " + appVersion); } catch (Exception e) {
+		 * logger.error("Error when acquiring the \"appVersion\"!", e); System.exit(1); }
+		 */
+
+		sparkConf.setJars(new String[] {
+			"build/libs/continuous-validator-" + appVersion + "-all.jar"
+		}); // This is the "fat-Jar".
+		sparkConf.validateSettings();
+
+		logger.debug("Spark custom configurations: " + sparkConf.getAll().toString());
+
+		LiteratureGuidelinesV4Profile profile = new LiteratureGuidelinesV4Profile();
+
+		try (JavaSparkContext sc = new JavaSparkContext(sparkConf)) {
+			JavaPairRDD<String, String> jprdd = sc.wholeTextFiles(TEST_FILES_V4_DIR);
+
+			logger.info("Showing the validation-results..");
+
+			// Use a new instance of Document Builder in each worker, as it is not thread-safe.
+			// The "x._1" is the filename and the "x._2" in the content of the file.
+			List<XMLApplicationProfile.ValidationResult> validationResultsList = jprdd
+				.map(
+					x -> profile
+						.validate(
+							x._1,
+							TestUtils.getDocumentBuilder().parse(IOUtils.toInputStream(x._2, StandardCharsets.UTF_8))))
+				.collect();
+
+			if (validationResultsList.isEmpty()) {
+				logger.error("The \"validationResultsList\" was empty!");
+				return;
+			}
+
+			if (logger.isDebugEnabled())
+				validationResultsList.forEach(vr -> logger.debug(vr.id() + " | score:" + vr.score()));
+
+			logger.debug(validationResultsList.toString());
+
+			try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(RESULTS_FILE), StandardCharsets.UTF_8)) {
+				writer.write(new Gson().toJson(validationResultsList));
+			} catch (Exception e) {
+				logger
+					.error(
+						"Error when writing the \"validationResultsList\" as json into the results-file: "
+							+ RESULTS_FILE);
+				return;
+			}
+
+			Option<String> uiWebUrl = sc.sc().uiWebUrl();
+			if (uiWebUrl.isDefined()) {
+				logger
+					.info(
+						"Waiting 60 seconds, before shutdown, for the user to check the jobs' status at: "
+							+ uiWebUrl.get());
+				Thread.sleep(60_000);
+			} else
+				logger.info("The \"uiWebUrl\" is not defined, in order to check the jobs' status. Shutting down..");
+
+		} catch (JsonIOException jie) {
+			logger.error("Error when writing the validation results to the json file: " + jie.getMessage());
+		} catch (Exception e) {
+			logger.error("Error validating directory: " + TEST_FILES_V4_DIR, e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/01_gv4.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/01_gv4.xml
@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<resource xmlns="http://namespace.openaire.eu/schema/oaire/" xmlns:dc="http://purl.org/dc/elements/1.1/"
+          xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:datacite="http://datacite.org/schema/kernel-4"
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+          xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
+     <datacite:titles>
+      <datacite:title xml:lang="eng">Journal bearings subjected to dynamic loads: the analytical mobility method</datacite:title>
+     </datacite:titles>
+     <datacite:creators>
+      <datacite:creator>
+       <datacite:creatorName>Flores, Paulo</datacite:creatorName>
+      </datacite:creator>
+      <datacite:creator>
+       <datacite:creatorName>Claro, José Carlos Pimenta</datacite:creatorName>
+      </datacite:creator>
+      <datacite:creator>
+       <datacite:creatorName>Ambrósio, Jorge</datacite:creatorName>
+      </datacite:creator>
+     </datacite:creators>
+     <datacite:contributors>
+      <datacite:contributor contributorType="HostingInstitution">
+       <datacite:contributorName>Universidade do Minho</datacite:contributorName>
+       <datacite:nameIdentifier nameIdentifierScheme="e-mail" schemeURI="mailto:repositorium@sdum.uminho.pt">repositorium@sdum.uminho.pt</datacite:nameIdentifier>
+      </datacite:contributor>
+     </datacite:contributors>
+     <datacite:relatedIdentifiers/>
+     <datacite:dates>
+      <datacite:date dateType="Accepted">2005-06-16</datacite:date>
+      <datacite:date dateType="Issued">2005-06-16</datacite:date>
+     </datacite:dates>
+     <dc:language>eng</dc:language>
+     <oaire:resourceType uri="http://purl.org/coar/resource_type/c_5794" resourceTypeGeneral="literature">conference paper</oaire:resourceType>
+     <dc:description xml:lang="por">The main purpose of this work is to use the analytical mobility method to analyze journal bearings subjected to dynamic loads, with the intent to include it in a general computational program that has been developed for the dynamic analysis of general mechanical systems. A simple journal bearing subjected to a dynamic load is chosen as a demonstrative example, in order to provide the necessary results for a comprehensive discussion of the methodology presented.</dc:description>
+     <dc:description xml:lang="por">Fundação para a Ciência e a Tecnologia (FCT)</dc:description>
+     <dc:description xml:lang="por">Fundo Comunitário Europeu FEDER under project POCTI/EME/2001/38281, entitled ‘Dynamic of Mechanical Systems with joint Clearances and Imperfections’</dc:description>
+     <dc:format>application/pdf</dc:format>
+     <datacite:identifier identifierType="Handle">https://hdl.handle.net/1822/18042</datacite:identifier>
+     <datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
+     <datacite:subjects>
+      <datacite:subject>Dynamic bearings</datacite:subject>
+      <datacite:subject>Hydrodynamic lubrication</datacite:subject>
+     </datacite:subjects>
+     <datacite:sizes>
+      <datacite:size>294443 bytes</datacite:size>
+     </datacite:sizes>
+     <oaire:file accessRightsURI="http://purl.org/coar/access_right/c_16ec" mimeType="application/pdf" objectType="fulltext">https://repositorium.sdum.uminho.pt/bitstream/1822/18042/1/CI-2005_02.pdf</oaire:file>
+     <oaire:citationTitle>IberTrib - 3º Congresso Ibérico de Tribologia</oaire:citationTitle>
+     <oaire:citationStartPage>1</oaire:citationStartPage>
+     <oaire:citationEndPage>15</oaire:citationEndPage>
+     <oaire:citationConferencePlace>Guimarães, Portugal</oaire:citationConferencePlace>
+    </resource>
--- a/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/oai_mediarep_org_doc_2534.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/oai_mediarep_org_doc_2534.xml
@ -0,0 +1,59 @@
+<?xml version='1.0' encoding='UTF-8'?><record xmlns="http://www.openarchives.org/OAI/2.0/">
+         <header>
+            <identifier>oai:mediarep.org:doc/2534</identifier>
+            <datestamp>2020-03-27T11:35:21Z</datestamp>
+            <setSpec setName="Zeitschrift für Medienwissenschaft">com_doc_568</setSpec>
+            <setSpec setName="Zeitschriften">com_doc_179</setSpec>
+            <setSpec>col_doc_1963</setSpec>
+            <setSpec setName="OpenAIRE">openaire</setSpec>
+         </header>
+         <metadata>
+             <resource xmlns="http://namespace.openaire.eu/schema/oaire/"
+                       xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dcterms="http://purl.org/dc/terms/"
+                       xmlns:dc="http://purl.org/dc/elements/1.1/"
+                       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                       xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
+               <datacite:title>Die autogerechte Stadt (Rückführung)</datacite:title>
+               <datacite:creators>
+                  <datacite:creator>
+                     <datacite:creatorName>Schmitt, Arne</datacite:creatorName>
+                  </datacite:creator>
+               </datacite:creators>
+               <datacite:alternateIdentifiers>
+                  <datacite:alternateIdentifier alternateIdentifierType="URL">https://mediarep.org/handle/doc/2534</datacite:alternateIdentifier>
+               </datacite:alternateIdentifiers>
+               <datacite:relatedIdentifiers>
+                  <datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="IsPartOf">1869-1722</datacite:relatedIdentifier>
+               </datacite:relatedIdentifiers>
+               <dc:language>deu</dc:language>
+               <dc:publisher>diaphanes</dc:publisher>
+               <datacite:date dateType="Issued">2015</datacite:date>
+               <resourceType resourceTypeGeneral="literature" uri="http://purl.org/coar/resource_type/c_6501">
+                        journal article
+                    </resourceType>
+               <dc:description>Bildstrecke ausgewählter Abbildungen aus Hans Bernhard Reichows Buch DIE AUTOGERECHTE STADT.</dc:description>
+               <dc:format>application/pdf</dc:format>
+               <datacite:identifier identifierType="DOI">http://dx.doi.org/10.25969/mediarep/1436</datacite:identifier>
+               <datacite:rights uri="http://purl.org/coar/access_right/c_abf2">
+                open access
+            </datacite:rights>
+               <datacite:subjects>
+                  <datacite:subject>Infrastruktur</datacite:subject>
+                  <datacite:subject>Urbanität</datacite:subject>
+                  <datacite:subject>Mobilität</datacite:subject>
+                  <datacite:subject subjectScheme="DDC">300</datacite:subject>
+               </datacite:subjects>
+               <version uri="http://purl.org/coar/version/c_970fb48d4fbd8a85">
+                        VoR
+                    </version>
+               <file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="&#xa;                            fulltext&#xa;                        ">https://mediarep.org/bitstream/doc/2534/1/ZfM_12_104-133_Schmitt_Die_autogerechte_Stadt.pdf</file>
+               <citationTitle>Zeitschrift für Medienwissenschaft</citationTitle>
+               <citationVolume>7</citationVolume>
+               <citationIssue>1</citationIssue>
+               <citationStartPage>104</citationStartPage>
+               <citationEndPage>113</citationEndPage>
+               <dcterms:audience>Researchers</dcterms:audience>
+               <dcterms:audience>Students</dcterms:audience>
+            </resource>
+         </metadata>
+      </record>
--- a/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/v4_literature_all_guidelines_record.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/v4_literature_all_guidelines_record.xml
@ -0,0 +1,251 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--This document supports all available OpenAIRE Guidelines for Literature Repositories v4-->
+<resource
+        xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:dcterms="http://purl.org/dc/terms/"
+        xmlns:datacite="http://datacite.org/schema/kernel-4"
+        xmlns="http://namespace.openaire.eu/schema/oaire/"
+        xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
+    <!--1. Title (M)-->
+    <datacite:titles>
+        <datacite:title xml:lang="en-US">
+            National Institute for Environmental Studies and Center
+            for Climate System Research Japan
+        </datacite:title>
+        <datacite:title xml:lang="en-US" titleType="Subtitle">A survey</datacite:title>
+    </datacite:titles>
+
+    <!--2. Creator (M)-->
+    <datacite:creators>
+        <datacite:creator>
+            <datacite:creatorName nameType="Personal">Evans, R.J.</datacite:creatorName>
+            <datacite:givenName>Robert</datacite:givenName>
+            <datacite:familyName>Evans</datacite:familyName>
+            <datacite:affiliation>Institute of Science and Technology</datacite:affiliation>
+            <datacite:nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">
+                1234-1234-1234-1234
+            </datacite:nameIdentifier>
+        </datacite:creator>
+    </datacite:creators>
+
+    <!--3. Contributor (MA)-->
+    <datacite:contributors>
+        <datacite:contributor contributorType="Other">
+            <datacite:contributorName nameType="Organizational">i-5uvLfGD2R.y8fXv442y8o.D6v1</datacite:contributorName>
+            <datacite:givenName>givenName3</datacite:givenName>
+            <datacite:familyName>familyName3</datacite:familyName>
+            <datacite:nameIdentifier nameIdentifierScheme="hK9c28uVD" schemeURI="http://fkPymUoN/">MuDRo.ymY7qG1Or.9Ny</datacite:nameIdentifier>
+            <datacite:nameIdentifier nameIdentifierScheme="k_Q7K8AbQiwLlreHzyU" schemeURI="http://EaTpassQ/">a1Sp</datacite:nameIdentifier>
+            <datacite:affiliation>affiliation5</datacite:affiliation>
+            <datacite:affiliation>affiliation6</datacite:affiliation>
+        </datacite:contributor>
+        <datacite:contributor contributorType="Editor">
+            <datacite:contributorName nameType="Organizational">ZxHQr4TGMeqqEOTbMcNd_EMFvIf</datacite:contributorName>
+            <datacite:givenName>givenName4</datacite:givenName>
+            <datacite:familyName>familyName4</datacite:familyName>
+            <datacite:nameIdentifier nameIdentifierScheme="PWhc-.4-Ock6m2WlzXL4km" schemeURI="http://KTYQUXaS/">nJNWaCv</datacite:nameIdentifier>
+            <datacite:nameIdentifier nameIdentifierScheme="fZBhNb-U2t" schemeURI="http://BJDgOJde/">RobhZUU3Obaqq-3UlrgxKNe-c0</datacite:nameIdentifier>
+            <datacite:affiliation>affiliation7</datacite:affiliation>
+            <datacite:affiliation>affiliation8</datacite:affiliation>
+        </datacite:contributor>
+    </datacite:contributors>
+
+    <!--4. Funding Reference (MA)-->
+    <oaire:fundingReferences>
+        <oaire:fundingReference>
+            <oaire:funderName>pZT7SjviMp4yodvIG</oaire:funderName>
+            <oaire:funderIdentifier funderIdentifierType="ISNI">ff238yCNbhv5k5Y8AmbpyzYt</oaire:funderIdentifier>
+            <oaire:fundingStream>svI_6A</oaire:fundingStream>
+            <oaire:awardNumber awardURI="http://TNrUwkxq/">E-</oaire:awardNumber>
+            <oaire:awardTitle>award</oaire:awardTitle>
+        </oaire:fundingReference>
+        <oaire:fundingReference>
+            <oaire:funderName>funderName</oaire:funderName>
+            <oaire:funderIdentifier funderIdentifierType="Crossref Funder">QwqkD4y</oaire:funderIdentifier>
+            <oaire:fundingStream>yyADU1SFmiS-meqQ6</oaire:fundingStream>
+            <oaire:awardNumber awardURI="http://oKladiXr/">JyORkpwzt</oaire:awardNumber>
+            <oaire:awardTitle>gyg2F9EKT7Gp.mSBU8.Drqf</oaire:awardTitle>
+        </oaire:fundingReference>
+    </oaire:fundingReferences>
+
+    <!--5. Alternate Identifier (R)-->
+    <datacite:alternateIdentifiers>
+        <datacite:alternateIdentifier alternateIdentifierType="DOI">10.1002/chem.201701589</datacite:alternateIdentifier>
+        <datacite:alternateIdentifier alternateIdentifierType="PMID">PMC5574022</datacite:alternateIdentifier>
+    </datacite:alternateIdentifiers>
+
+    <!--6. Related Identifier (R)-->
+    <datacite:relatedIdentifiers>
+        <datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="HasMetadata" relatedMetadataScheme="sdfhr" schemeURI="http://fsdfg.cm" schemeType="XSD" resourceTypeGeneral="DataPaper">
+            0947-6539
+        </datacite:relatedIdentifier>
+        <datacite:relatedIdentifier relatedIdentifierType="EISSN" relationType="IsPartOf" resourceTypeGeneral="Model">1521-3765</datacite:relatedIdentifier>
+    </datacite:relatedIdentifiers>
+
+    <datacite:dates>
+        <!--7. Embargo Period Date (MA)-->
+        <datacite:date dateType="Accepted">2011-12-01</datacite:date>
+        <datacite:date dateType="Available">2012-12-31</datacite:date>
+        <!--10. Publication Date (M)-->
+        <datacite:date dateType="Issued">2011</datacite:date>
+    </datacite:dates>
+
+    <!--8. Language (MA)-->
+    <dc:language>eng</dc:language>
+    <dc:language>deu</dc:language>
+    <dc:language>nld</dc:language>
+
+    <!--9. Publisher (MA)-->
+    <dc:publisher>
+        Loughborough University. Department of Computer Science
+    </dc:publisher>
+    <dc:publisher>John Wiley &amp; Sons, Inc. (US)</dc:publisher>
+
+    <!--11. Resource Type (M)-->
+    <oaire:resourceType resourceTypeGeneral="literature" uri="http://purl.org/coar/resource_type/c_93fc">report</oaire:resourceType>
+
+    <!--12. Description (MA)-->
+    <dc:description xml:lang="en-US">
+        Foreword [by] Hazel Anderson; Introduction; The scientific heresy:
+        transformation of a society; Consciousness as causal reality [etc]
+    </dc:description>
+    <dc:description xml:lang="en-US">
+        A number of problems in quantum state and system identification are
+        addressed.
+    </dc:description>
+
+    <!--13. Format (R)-->
+    <dc:format>video/quicktime</dc:format>
+    <dc:format>application/pdf</dc:format>
+    <dc:format>application/xml</dc:format>
+
+    <!--14. Resource Identifier (M)-->
+    <datacite:identifier identifierType="URN">http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-160648</datacite:identifier>
+
+    <!--15. Access Rights (M)-->
+    <datacite:rights uri="http://purl.org/coar/access_right/c_f1cf">embargoed access</datacite:rights>
+
+    <!--16. Source (R)-->
+    <dc:source>Ecology  Letters (1461023X)  vol.4 (2001)</dc:source>
+    <dc:source>Ecology  Letters (1461025X)  vol.5 (2002)</dc:source>
+
+    <!--17. Subject (MA)-->
+    <datacite:subjects>
+        <datacite:subject subjectScheme="fAOT3oCMLqANRdqCniD" schemeURI="http://bsOyHkIY/" valueURI="http://ZiIiSVAz/" xml:lang="it-IT">ft12Zy</datacite:subject>
+        <datacite:subject subjectScheme="b-FUtIiLRu6RsuIcde3KKhzz.9" schemeURI="http://ttXSOOlB/" valueURI="http://GixLBNfF/"
+                          xml:lang="el-GR">Bf6AElMCkh.mqutOmETp0</datacite:subject>
+    </datacite:subjects>
+
+    <!--18. License Condition (R)-->
+    <oaire:licenseCondition startDate="2019-02-01" uri="http://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial</oaire:licenseCondition>
+
+    <!--19. Coverage (R)-->
+    <dc:coverage>2000-2010</dc:coverage>
+    <dc:coverage>
+        scheme=historic; content=Ming Dynasty
+    </dc:coverage>
+
+    <!--20. Size (O)-->
+    <datacite:sizes>
+        <datacite:size>15 pages</datacite:size>
+        <datacite:size>6 MB</datacite:size>
+    </datacite:sizes>
+
+    <!--21. Geo Location (O)-->
+    <datacite:geoLocations>
+        <datacite:geoLocation>
+            <datacite:geoLocationPlace>Atlantic Ocean</datacite:geoLocationPlace>
+            <datacite:geoLocationPoint>
+                <datacite:pointLongitude>31.233</datacite:pointLongitude>
+                <datacite:pointLatitude>-67.302</datacite:pointLatitude>
+            </datacite:geoLocationPoint>
+            <datacite:geoLocationBox>
+                <datacite:westBoundLongitude>-71.032</datacite:westBoundLongitude>
+                <datacite:eastBoundLongitude>-68.211</datacite:eastBoundLongitude>
+                <datacite:southBoundLatitude>41.090</datacite:southBoundLatitude>
+                <datacite:northBoundLatitude>42.893</datacite:northBoundLatitude>
+            </datacite:geoLocationBox>
+            <datacite:geoLocationPolygon>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>88.557</datacite:pointLongitude>
+                    <datacite:pointLatitude>-0.604</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>-143.373</datacite:pointLongitude>
+                    <datacite:pointLatitude>88.832</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>87.3</datacite:pointLongitude>
+                    <datacite:pointLatitude>-36.556</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>129.128</datacite:pointLongitude>
+                    <datacite:pointLatitude>4.616</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:inPolygonPoint>
+                    <datacite:pointLongitude>-17.547</datacite:pointLongitude>
+                    <datacite:pointLatitude>-47.629</datacite:pointLatitude>
+                </datacite:inPolygonPoint>
+            </datacite:geoLocationPolygon>
+            <datacite:geoLocationPolygon>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>78.121</datacite:pointLongitude>
+                    <datacite:pointLatitude>19.341</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>-118.035</datacite:pointLongitude>
+                    <datacite:pointLatitude>53.647</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>-49.07</datacite:pointLongitude>
+                    <datacite:pointLatitude>-45.561</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:polygonPoint>
+                    <datacite:pointLongitude>132.484</datacite:pointLongitude>
+                    <datacite:pointLatitude>-41.146</datacite:pointLatitude>
+                </datacite:polygonPoint>
+                <datacite:inPolygonPoint>
+                    <datacite:pointLongitude>179.293</datacite:pointLongitude>
+                    <datacite:pointLatitude>15.364</datacite:pointLatitude>
+                </datacite:inPolygonPoint>
+            </datacite:geoLocationPolygon>
+        </datacite:geoLocation>
+    </datacite:geoLocations>
+
+    <!--22. Resource Version (R)-->
+    <oaire:version uri="http://purl.org/coar/version/c_be7fb7dd8ff6fe43">NA</oaire:version>
+
+    <!--23. File Location (MA)-->
+    <oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://link-to-the-fulltext.org</oaire:file>
+    <oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://europepmc.org/articles/PMC5574022?pdf=render</oaire:file>
+
+    <!--24. Citation Title (R)-->
+    <oaire:citationTitle>some Journal Title</oaire:citationTitle>
+
+    <!--25. Citation Volume (R)-->
+    <oaire:citationVolume>10</oaire:citationVolume>
+
+    <!--26. Citation Issue (R)-->
+    <oaire:citationIssue>1</oaire:citationIssue>
+
+    <!--27. Citation Start Page (R)-->
+    <oaire:citationStartPage>100</oaire:citationStartPage>
+
+    <!--28. Citation End Page (R)-->
+    <oaire:citationEndPage>105</oaire:citationEndPage>
+
+    <!--29. Citation Edition (R)-->
+    <oaire:citationEdition>2</oaire:citationEdition>
+
+    <!--30. Citation Conference Place (R)-->
+    <oaire:citationConferencePlace>Berlin</oaire:citationConferencePlace>
+
+    <!--31. Citation Conference Date (R)-->
+    <oaire:citationConferenceDate>2013-10-22 - 2013-10-23</oaire:citationConferenceDate>
+
+    <!--32. Audience (O)-->
+    <dcterms:audience>Researchers</dcterms:audience>
+    <dcterms:audience>Students</dcterms:audience>
+</resource>
--- a/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml
@ -0,0 +1,197 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--This document supports all available OpenAIRE Guidelines for Literature Repositories v4-->
+<resource
+        xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:dcterms="http://purl.org/dc/terms/"
+        xmlns:datacite="http://datacite.org/schema/kernel-4"
+        xmlns="http://namespace.openaire.eu/schema/oaire/"
+        xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
+    <!--1. Title (M)-->
+    <datacite:titles>
+        <datacite:notitle xml:lang="en-US">
+            National Institute for Environmental Studies and Center
+            for Climate System Research Japan
+        </datacite:notitle>
+    </datacite:titles>
+
+    <!--2. Creator (M)-->
+    <datacite:creators>
+        <datacite:creator>
+<!--            <datacite:creatorName nameType="Personal">Evans, R.J.</datacite:creatorName>-->
+<!--            <datacite:givenName>Robert</datacite:givenName>-->
+<!--            <datacite:familyName>Evans</datacite:familyName>-->
+<!--            <datacite:affiliation>Institute of Science and Technology</datacite:affiliation>-->
+<!--            <datacite:nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">-->
+<!--                1234-1234-1234-1234-->
+<!--            </datacite:nameIdentifier>-->
+        </datacite:creator>
+    </datacite:creators>
+
+    <!--3. Contributor (MA)-->
+    <datacite:contributors>
+        <datacite:contributor>
+<!--            <datacite:contributorName nameType="Organizational">i-5uvLfGD2R.y8fXv442y8o.D6v1</datacite:contributorName>-->
+<!--            <datacite:givenName>givenName3</datacite:givenName>-->
+<!--            <datacite:familyName>familyName3</datacite:familyName>-->
+            <datacite:nameIdentifier>MuDRo.ymY7qG1Or.9Ny</datacite:nameIdentifier>
+            <datacite:nameIdentifier nameIdentifierScheme="k_Q7K8AbQiwLlreHzyU" schemeURI="http://EaTpassQ/">a1Sp</datacite:nameIdentifier>
+            <datacite:affiliation>affiliation5</datacite:affiliation>
+            <datacite:affiliation>affiliation6</datacite:affiliation>
+        </datacite:contributor>
+    </datacite:contributors>
+
+    <!--4. Funding Reference (MA)-->
+    <oaire:fundingReferences>
+        <oaire:fundingReference>
+<!--            <oaire:funderName>pZT7SjviMp4yodvIG</oaire:funderName> 1 error for missing mandatory element-->
+            <oaire:funderIdentifier funderIdentifierType="invalid">ff238yCNbhv5k5Y8AmbpyzYt</oaire:funderIdentifier><!--1 warning for incorrect value of funderIdentifierType-->
+            <oaire:fundingStream>svI_6A</oaire:fundingStream>
+            <oaire:awardNumber>E-</oaire:awardNumber><!--1 warning for missing awardURI attribute-->
+            <!--1 warning for missing oaire:awardTitle element-->
+        </oaire:fundingReference>
+        <oaire:fundingReference>
+            <oaire:funderName>funderName</oaire:funderName>
+            <oaire:funderIdentifier funderIdentifierType="Crossref Funder">QwqkD4y</oaire:funderIdentifier>
+            <oaire:fundingStream>yyADU1SFmiS-meqQ6</oaire:fundingStream>
+            <oaire:awardNumber awardURI="http://oKladiXr/">JyORkpwzt</oaire:awardNumber>
+            <oaire:awardTitle>gyg2F9EKT7Gp.mSBU8.Drqf</oaire:awardTitle>
+        </oaire:fundingReference>
+    </oaire:fundingReferences>
+
+    <!--5. Alternate Identifier (R)-->
+    <datacite:alternateIdentifiers>
+        <datacite:alternateIdentifier alternateIdentifierType="invalid">10.1002/chem.201701589</datacite:alternateIdentifier><!--1 error for incorrect value of alternateIdentifierType-->
+        <datacite:alternateIdentifier alternateIdentifierType="PMID">PMC5574022</datacite:alternateIdentifier>
+    </datacite:alternateIdentifiers>
+
+    <!--6. Related Identifier (R)-->
+    <datacite:relatedIdentifiers>
+        <datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="HasMetadata" relatedMetadataScheme="sdfhr" schemeURI="http://fsdfg.cm" schemeType="XSD" resourceTypeGeneral="DataPaper">
+            0947-6539
+        </datacite:relatedIdentifier>
+        <datacite:relatedIdentifier relatedIdentifierType="invalid" relationType="invalid" resourceTypeGeneral="Model">1521-3765</datacite:relatedIdentifier><!--2 errors for incorrect value of relationType and relatedIdentifierType-->
+    </datacite:relatedIdentifiers>
+
+    <datacite:dates>
+        <!--7. Embargo Period Date (MA)-->
+        <datacite:nodate dateType="Accepted">2011-12-01</datacite:nodate>
+        <datacite:nodate dateType="Available">2012-12-01</datacite:nodate>
+        <!--10. Publication Date (M)-->
+        <datacite:date dateType="invalid">2011</datacite:date>
+    </datacite:dates>
+
+    <!--8. Language (MA)-->
+    <dc:language>eng</dc:language>
+    <dc:language>deu</dc:language>
+    <dc:language>invalidTag</dc:language><!--1 error for not allowed value-->
+
+    <!--9. Publisher (MA)-->
+    <dc:nopublisher>
+        Loughborough University. Department of Computer Science
+    </dc:nopublisher>
+    <dc:nopublisher>John Wiley &amp; Sons, Inc. (US)</dc:nopublisher>
+
+    <!--11. Resource Type (M)-->
+    <oaire:resourceType resourceTypeGeneral="invalid" uri="http://invalid">report</oaire:resourceType><!--2 errors for invalid mandatory attributes-->
+
+    <!--12. Description (MA)-->
+    <dc:description xml:lang="invalidTag"><!--1 warning for not allowed value of optional attribute-->
+        Foreword [by] Hazel Anderson; Introduction; The scientific heresy:
+        transformation of a society; Consciousness as causal reality [etc]
+    </dc:description>
+    <dc:description xml:lang="en-US">
+        A number of problems in quantum state and system identification are
+        addressed.
+    </dc:description>
+
+    <!--13. Format (R)-->
+    <dc:format>video/quicktime</dc:format>
+    <dc:format>application/pdf</dc:format>
+    <dc:format>application/invalid</dc:format><!--Should return 1 warning for not allowed value?-->
+
+    <!--14 . Resource Identifier (M)-->
+    <datacite:identifier identifierType="invalid">http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-160648</datacite:identifier><!--1 error for invalid mandatory attribute-->
+
+    <!--15. Access Rights (M)-->
+    <datacite:rights uri="http://purl.org/coar/access_right/c_f1cf">invalid</datacite:rights><!--1 error for not allowed value-->
+
+    <!--16. Source (R)-->
+    <dc:nosource>Ecology  Letters (1461023X)  vol.4 (2001)</dc:nosource>
+    <dc:nosource>Ecology  Letters (1461025X)  vol.5 (2002)</dc:nosource>
+
+    <!--17. Subject (MA)-->
+    <datacite:subjects>
+        <datacite:subject>ft12Zy</datacite:subject><!--3 warnings for missing attributes-->
+        <datacite:subject subjectScheme="b-FUtIiLRu6RsuIcde3KKhzz.9" schemeURI="http://ttXSOOlB/" valueURI="http://GixLBNfF/"
+                          xml:lang="el-GR">Bf6AElMCkh.mqutOmETp0</datacite:subject>
+    </datacite:subjects>
+
+    <!--18. License Condition (R)-->
+    <oaire:licenseCondition startDate="invalid date">Creative Commons Attribution-NonCommercial</oaire:licenseCondition><!--2 errors for invalid attributes-->
+
+    <!--19. Coverage (R)-->
+    <dc:nocoverage>2000-2010</dc:nocoverage>
+    <dc:nocoverage>
+        scheme=historic; content=Ming Dynasty
+    </dc:nocoverage>
+
+    <!--20. Size (O)-->
+    <datacite:sizes>
+<!--        <datacite:size>15 pages</datacite:size>-->
+<!--        <datacite:size>6 MB</datacite:size>-->
+    </datacite:sizes>
+
+    <!--21. Geo Location (O)-->
+    <datacite:geoLocations>
+        <datacite:geoLocation>
+            <datacite:geoLocationPlace>Atlantic Ocean</datacite:geoLocationPlace>
+            <datacite:geoLocationPoint>
+                <datacite:pointLongitude>31.233</datacite:pointLongitude>
+                <datacite:nopointLatitude>-67.302</datacite:nopointLatitude><!--1 error-->
+            </datacite:geoLocationPoint>
+            <datacite:geoLocationBox>
+                <datacite:westBoundLongitude>-71.032</datacite:westBoundLongitude>
+                <datacite:eastBoundLongitude>-68.211</datacite:eastBoundLongitude>
+                <datacite:nosouthBoundLatitude>41.090</datacite:nosouthBoundLatitude><!--1 error-->
+                <datacite:northBoundLatitude>42.893</datacite:northBoundLatitude>
+            </datacite:geoLocationBox>
+        </datacite:geoLocation>
+    </datacite:geoLocations>
+
+    <!--22. Resource Version (R)-->
+    <oaire:version>irrelevant</oaire:version><!--1 warning for missing MA attribute-->
+
+    <!--23. File Location (MA)-->
+    <oaire:file accessRightsURI="http://invalid" mimeType="invalid/type" objectType="invalid">http://link-to-the-fulltext.org</oaire:file><!--3 warnings for incorrect attributes-->
+    <oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://europepmc.org/articles/PMC5574022?pdf=render</oaire:file>
+
+    <!--24. Citation Title (R)-->
+    <oaire:nocitationTitle>some Journal Title</oaire:nocitationTitle>
+
+    <!--25. Citation Volume (R)-->
+    <oaire:nocitationVolume>10</oaire:nocitationVolume>
+
+    <!--26. Citation Issue (R)-->
+    <oaire:nocitationIssue>1</oaire:nocitationIssue>
+
+    <!--27. Citation Start Page (R)-->
+    <oaire:nocitationStartPage>100</oaire:nocitationStartPage>
+
+    <!--28. Citation End Page (R)-->
+    <oaire:nocitationEndPage>105</oaire:nocitationEndPage>
+
+    <!--29. Citation Edition (R)-->
+    <oaire:nocitationEdition>2</oaire:nocitationEdition>
+
+    <!--30. Citation Conference Place (R)-->
+    <oaire:nocitationConferencePlace>Berlin</oaire:nocitationConferencePlace>
+
+    <!--31. Citation Conference Date (R)-->
+    <oaire:citationConferenceDate>2013-02-29</oaire:citationConferenceDate><!--1 warning for not allowed value-->
+
+    <!--32. Audience (O)-->
+    <dcterms:noaudience>Researchers</dcterms:noaudience>
+    <dcterms:noaudience>Students</dcterms:noaudience>
+</resource>
--- a/dhp-workflows/dhp-continuous-validation/src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet
+++ b/dhp-workflows/dhp-continuous-validation/src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@ -39,6 +39,7 @@
        <module>dhp-doiboost</module>
        <module>dhp-impact-indicators</module>
        <module>dhp-swh</module>
+        <module>dhp-continuous-validation</module>
    </modules>

    <pluginRepositories>