Continuous Validation Workflow #388
|
@ -18,10 +18,14 @@
|
||||||
<maven.compiler.source>8</maven.compiler.source>
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
<maven.compiler.target>8</maven.compiler.target>
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<jackson.version>2.14.3</jackson.version> <!-- Scala doesn't want a higher version than 2.14.x -->
|
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>uoa-validator-engine2</artifactId>
|
<artifactId>uoa-validator-engine2</artifactId>
|
||||||
|
@ -33,10 +37,7 @@
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||||
<version>${dhp.spark.version}</version>
|
<version>${dhp.spark.version}</version>
|
||||||
<!--<scope>provided</scope>-->
|
<scope>provided</scope>
|
||||||
<!--
|
|
||||||
<scope>compile</scope>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<!-- This is an older version which causes problems. We have to add the latest version independently. -->
|
<!-- This is an older version which causes problems. We have to add the latest version independently. -->
|
||||||
|
@ -70,8 +71,8 @@
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||||
<version>${dhp.spark.version}</version>
|
<version>${dhp.spark.version}</version>
|
||||||
<!--<scope>provided</scope>-->
|
<scope>provided</scope>
|
||||||
<!--<scope>compile</scope>-->
|
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<!-- This exclusion is a must for scala 2.11 and spark 2.4.0.cloudera2 -->
|
<!-- This exclusion is a must for scala 2.11 and spark 2.4.0.cloudera2 -->
|
||||||
<exclusion>
|
<exclusion>
|
||||||
|
@ -94,8 +95,8 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
<!--<version>${dhp.hadoop.version}</version>-->
|
<version>${dhp.hadoop.version}</version>
|
||||||
<!--<scope>compile</scope>-->
|
<scope>provided</scope> <!-- This is required here, when setting the "version" above, even if that version is the same used in the main pom, where the dependency includes the "provided" scope. -->
|
||||||
|
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
|
@ -144,40 +145,12 @@
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-core -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
|
||||||
<artifactId>metrics-core</artifactId>
|
|
||||||
<version>4.2.22</version>
|
|
||||||
<!-- <scope>compile</scope>-->
|
|
||||||
</dependency>
|
|
||||||
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-databind</artifactId>
|
|
||||||
<version>${jackson.version}</version>
|
|
||||||
<!-- <scope>compile</scope>-->
|
|
||||||
</dependency>
|
|
||||||
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-core</artifactId>
|
|
||||||
<version>${jackson.version}</version>
|
|
||||||
<!-- <scope>compile</scope>-->
|
|
||||||
</dependency>
|
|
||||||
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-annotations</artifactId>
|
|
||||||
<version>${jackson.version}</version>
|
|
||||||
<!-- <scope>compile</scope>-->
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-app -->
|
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-app -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-mapreduce-client-app</artifactId>
|
<artifactId>hadoop-mapreduce-client-app</artifactId>
|
||||||
<version>${dhp.hadoop.version}</version>
|
<version>${dhp.hadoop.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
|
@ -206,56 +179,13 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
|
|
||||||
<!--<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
|
||||||
<version>${dhp.hadoop.version}</version>
|
|
||||||
|
|
||||||
<!– <scope>compile</scope>–>
|
|
||||||
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-reload4j</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>ch.qos.reload4j</groupId>
|
|
||||||
<artifactId>reload4j</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
|
|
||||||
<!– Vulnerable dependencies: –>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>com.google.protobuf</groupId>
|
|
||||||
<artifactId>protobuf-java</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>io.netty</groupId>
|
|
||||||
<artifactId>netty</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Add back some updated version of the needed dependencies. -->
|
<!-- Add back some updated version of the needed dependencies. -->
|
||||||
<!-- This should be enabled only when using Hadoop 3.0.0+ -->
|
<!-- This should be enabled only when using Hadoop 3.0.0+ -->
|
||||||
<!--<dependency>
|
<!--<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-client-api</artifactId>
|
<artifactId>hadoop-client-api</artifactId>
|
||||||
<version>${dhp.hadoop.version}</version>
|
<version>${dhp.hadoop.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
</dependency>-->
|
</dependency>-->
|
||||||
<dependency> <!-- Newer versions (>=0.18.X) are not compatible with JAVA 8. -->
|
<dependency> <!-- Newer versions (>=0.18.X) are not compatible with JAVA 8. -->
|
||||||
<groupId>org.apache.thrift</groupId>
|
<groupId>org.apache.thrift</groupId>
|
||||||
|
@ -273,7 +203,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.code.gson</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
<artifactId>gson</artifactId>
|
<artifactId>gson</artifactId>
|
||||||
<version>2.10.1</version>
|
<version>${google.gson.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- logback versions 1.4.X require Java-11 -->
|
<!-- logback versions 1.4.X require Java-11 -->
|
||||||
|
@ -308,68 +238,8 @@
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
|
||||||
<artifactId>dhp-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<!-- <scope>compile</scope>-->
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<!--<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-shade-plugin</artifactId>
|
|
||||||
<version>3.5.1</version>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<phase>package</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>shade</goal>
|
|
||||||
</goals>
|
|
||||||
<configuration>
|
|
||||||
<shadedArtifactAttached>false</shadedArtifactAttached>
|
|
||||||
<transformers>
|
|
||||||
<!– add Main-Class to manifest file –>
|
|
||||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
|
||||||
<mainClass>eu.dnetlib.dhp.continuous_validator.ContinuousValidator</mainClass>
|
|
||||||
</transformer>
|
|
||||||
</transformers>
|
|
||||||
<filters>
|
|
||||||
<!– filter manifest signature files when creating uber-jar –>
|
|
||||||
<filter>
|
|
||||||
<artifact>*:*</artifact>
|
|
||||||
<excludes>
|
|
||||||
<exclude>META-INF/INDEX.LIST</exclude>
|
|
||||||
<exclude>META-INF/*.SF</exclude>
|
|
||||||
<exclude>META-INF/*.DSA</exclude>
|
|
||||||
<exclude>META-INF/*.RSA</exclude>
|
|
||||||
<exclude>LICENSE*</exclude>
|
|
||||||
</excludes>
|
|
||||||
</filter>
|
|
||||||
</filters>
|
|
||||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
|
||||||
<!– The "minimize" config breaks things at runtime. –>
|
|
||||||
<!–<minimizeJar>true</minimizeJar>–>
|
|
||||||
</configuration>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<version>3.2.2</version>
|
|
||||||
<configuration>
|
|
||||||
<!–<excludes>
|
|
||||||
<exclude>some test to exclude here</exclude>
|
|
||||||
</excludes>–>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>-->
|
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
<id>libs</id>
|
<id>libs</id>
|
||||||
|
|
|
@ -33,7 +33,7 @@ import scala.Option;
|
||||||
public class ContinuousValidator {
|
public class ContinuousValidator {
|
||||||
|
|
||||||
public static final String TEST_FILES_V4_DIR = TestUtils.TEST_FILES_BASE_DIR + "openaireguidelinesV4/";
|
public static final String TEST_FILES_V4_DIR = TestUtils.TEST_FILES_BASE_DIR + "openaireguidelinesV4/";
|
||||||
public static final String RESULTS_FILE = "results.json";
|
public static final String RESULTS_FILE_NAME = "results.json";
|
||||||
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
|
||||||
private static final String parametersFile = "input_continuous_validator_parameters.json";
|
private static final String parametersFile = "input_continuous_validator_parameters.json";
|
||||||
|
|
||||||
|
@ -60,32 +60,20 @@ public class ContinuousValidator {
|
||||||
|
|
||||||
parser = new ArgumentApplicationParser(jsonConfiguration);
|
parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
String isSParkSessionManagedStr = parser.get("isSparkSessionManaged");
|
|
||||||
if (isSParkSessionManagedStr == null) {
|
|
||||||
logger
|
|
||||||
.error(
|
|
||||||
"The \"isSParkSessionManagedStr\" was not retrieved from the parameters file: "
|
|
||||||
+ parametersFile);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This "is needed to implement a unit test in which the spark session is created in the context of the
|
|
||||||
// unit test itself rather than inside the spark application"
|
|
||||||
isSparkSessionManaged = Optional
|
|
||||||
.of(isSParkSessionManagedStr)
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
// TODO - If the above is tru,e then the Spark-session defined in the unit-test should be used..
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error when parsing the parameters!", e);
|
logger.error("Error when parsing the parameters!", e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
// This is needed to implement a unit test in which the spark session is created in the context of the
|
||||||
|
// unit test itself rather than inside the spark application"
|
||||||
|
|
||||||
parquet_file_path = parser.get("parquet_file_path");
|
parquet_file_path = parser.get("parquet_file_path");
|
||||||
if (parquet_file_path == null) {
|
if (parquet_file_path == null) {
|
||||||
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
|
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
|
||||||
|
@ -125,10 +113,8 @@ public class ContinuousValidator {
|
||||||
logger
|
logger
|
||||||
.info(
|
.info(
|
||||||
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
|
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
|
||||||
+ guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE);
|
+ guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE_NAME);
|
||||||
|
|
||||||
|
|
||||||
// TODO - USE THE "runWithSparkSession" METHOD TO RUN THE SPARK CODE INSIDE!!
|
|
||||||
AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile();
|
AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile();
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
@ -168,7 +154,7 @@ public class ContinuousValidator {
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(finalOutputPath + RESULTS_FILE); // The filename should be the name of the input-file or the
|
.json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the
|
||||||
// input-directory.
|
// input-directory.
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
|
|
|
@ -17,7 +17,7 @@ public class ReadResultsTest {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
List standardValidationResultList = new Gson()
|
List standardValidationResultList = new Gson()
|
||||||
.fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE)), List.class);
|
.fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE_NAME)), List.class);
|
||||||
if (standardValidationResultList == null)
|
if (standardValidationResultList == null)
|
||||||
logger.error("Could not map the json to a \"List\" object.");
|
logger.error("Could not map the json to a \"List\" object.");
|
||||||
else if (standardValidationResultList.isEmpty())
|
else if (standardValidationResultList.isEmpty())
|
||||||
|
@ -25,9 +25,9 @@ public class ReadResultsTest {
|
||||||
else
|
else
|
||||||
logger.info(standardValidationResultList.toString());
|
logger.info(standardValidationResultList.toString());
|
||||||
} catch (FileNotFoundException fnfe) {
|
} catch (FileNotFoundException fnfe) {
|
||||||
logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE + "\" does not exist!");
|
logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE_NAME + "\" does not exist!");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE + "\"", e);
|
logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE_NAME + "\"", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue