From 8d0ed7d4141d051677e75dc038e879e63cf7d062 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 25 Jan 2024 18:23:14 +0200 Subject: [PATCH] Continuous-Validation updates: - Update the "uoa-validator-engine2" dependency. - Update the "installProject.sh" script to account for potential conflict with previous builds. - Add documentation. --- dhp-workflows/dhp-continuous-validation/README.md | 8 ++++++++ dhp-workflows/dhp-continuous-validation/installProject.sh | 8 ++++++++ .../dhp-continuous-validation/runOozieWorkflow.sh | 6 ++++-- pom.xml | 2 +- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-continuous-validation/README.md b/dhp-workflows/dhp-continuous-validation/README.md index 7dc65f8e8..7905a0e55 100644 --- a/dhp-workflows/dhp-continuous-validation/README.md +++ b/dhp-workflows/dhp-continuous-validation/README.md @@ -1,3 +1,11 @@ # Continuous Validation +This module is responsible for deploying an **Oozie Workflow** (on the desired cluster), which executes a **Spark** action.
+This action takes the HDFS-path of a directory of parquet files containing metadata records, and applies the validation process on all of them, in parallel. Then it outputs the results, in json-format, in the given directory.
+The validation process is powered by the [**uoa-validator-engine2**](https://code-repo.d4science.org/MaDgIK/uoa-validator-engine2) software.
+ +### Install and run + +Run the **./installProject.sh** script and then the **./runOozieWorkflow.sh** script.
+ [...] \ No newline at end of file diff --git a/dhp-workflows/dhp-continuous-validation/installProject.sh b/dhp-workflows/dhp-continuous-validation/installProject.sh index 7f5f8f134..afd95578d 100755 --- a/dhp-workflows/dhp-continuous-validation/installProject.sh +++ b/dhp-workflows/dhp-continuous-validation/installProject.sh @@ -1,9 +1,17 @@ +# Install the whole "dnet-hadoop" project. + +# Delete this module's previous build-files in order to avoid any conflicts. +rm -rf target/ || + +# Go to the root directory of this project. cd ../../ +# Select the build profile. DEFAULT_PROFILE='' # It's the empty profile. NEWER_VERSIONS_PROFILE='-Pscala-2.12' CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE} +# Install the project. mvn clean install -U ${CHOSEN_MAVEN_PROFILE} -Dmaven.test.skip=true # We skip tests for all modules, since the take a big amount of time and some of them fail. diff --git a/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh b/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh index 665619b49..8ab67a39b 100755 --- a/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh +++ b/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh @@ -1,12 +1,14 @@ -# This script deploys and runs the oozie workflow. +# This script deploys and runs the oozie workflow on the cluster, defined in the "~/.dhp/application.properties" file. +# Select the build profile. DEFAULT_PROFILE='' # It's the empty profile. NEWER_VERSIONS_PROFILE='-Pscala-2.12' CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE} - +# Build and deploy this module. mvn clean package -U ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \ -Dworkflow.source.dir=eu/dnetlib/dhp/continuous_validator +# Show the Oozie-job-ID. echo -e "\n\nShowing the contents of \"extract-and-run-on-remote-host.log\":\n" cat ./target/extract-and-run-on-remote-host.log diff --git a/pom.xml b/pom.xml index c84ab8ff4..addc4cfec 100644 --- a/pom.xml +++ b/pom.xml @@ -207,7 +207,7 @@ eu.dnetlib uoa-validator-engine2 - 0.9.3 + 2.0.0-SNAPSHOT