From b71633fd7fc5bdb89c99a6dfb17d91a8aef1c4a6 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 15 Dec 2023 18:29:38 +0200 Subject: [PATCH] - Fix the location of the "input_continuous_validator_parameters.json" file. - Fix handing the "isSparkSessionManaged" parameter. - Add the "provided" scope for some dependencies. They do not inherit it from the main pom, since the "version" tag is declared, even though the value is the same as the one from the main pom. - Code polishing / cleanup. --- .../dhp-continuous-validation/pom.xml | 156 ++---------------- .../ContinuousValidator.java | 38 ++--- ...input_continuous_validator_parameters.json | 0 .../src/test/java/ReadResultsTest.java | 6 +- 4 files changed, 28 insertions(+), 172 deletions(-) rename dhp-workflows/dhp-continuous-validation/src/main/resources/{ => eu/dnetlib/dhp/continuous_validator}/input_continuous_validator_parameters.json (100%) diff --git a/dhp-workflows/dhp-continuous-validation/pom.xml b/dhp-workflows/dhp-continuous-validation/pom.xml index 585e273b2..85154848a 100644 --- a/dhp-workflows/dhp-continuous-validation/pom.xml +++ b/dhp-workflows/dhp-continuous-validation/pom.xml @@ -18,10 +18,14 @@ 8 8 UTF-8 - 2.14.3 + + eu.dnetlib.dhp + dhp-common + ${project.version} + eu.dnetlib uoa-validator-engine2 @@ -33,10 +37,7 @@ org.apache.spark spark-core_${scala.binary.version} ${dhp.spark.version} - - + provided @@ -70,8 +71,8 @@ org.apache.spark spark-sql_${scala.binary.version} ${dhp.spark.version} - - + provided + @@ -94,8 +95,8 @@ org.apache.hadoop hadoop-common - - + ${dhp.hadoop.version} + provided @@ -144,40 +145,12 @@ - - - io.dropwizard.metrics - metrics-core - 4.2.22 - - - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - - - org.apache.hadoop hadoop-mapreduce-client-app ${dhp.hadoop.version} + provided @@ -206,56 +179,13 @@ - - - - - org.apache.thrift @@ -273,7 +203,7 @@ com.google.code.gson gson - 2.10.1 + ${google.gson.version} @@ -308,68 +238,8 @@ test - - eu.dnetlib.dhp - dhp-common - ${project.version} - - - - - libs diff --git a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java index 00493530a..c6299fd0e 100644 --- a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java +++ b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validator/ContinuousValidator.java @@ -33,7 +33,7 @@ import scala.Option; public class ContinuousValidator { public static final String TEST_FILES_V4_DIR = TestUtils.TEST_FILES_BASE_DIR + "openaireguidelinesV4/"; - public static final String RESULTS_FILE = "results.json"; + public static final String RESULTS_FILE_NAME = "results.json"; private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class); private static final String parametersFile = "input_continuous_validator_parameters.json"; @@ -60,32 +60,20 @@ public class ContinuousValidator { parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - - String isSParkSessionManagedStr = parser.get("isSparkSessionManaged"); - if (isSParkSessionManagedStr == null) { - logger - .error( - "The \"isSParkSessionManagedStr\" was not retrieved from the parameters file: " - + parametersFile); - return; - } - - // This "is needed to implement a unit test in which the spark session is created in the context of the - // unit test itself rather than inside the spark application" - isSparkSessionManaged = Optional - .of(isSParkSessionManagedStr) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - // TODO - If the above is tru,e then the Spark-session defined in the unit-test should be used.. - } catch (Exception e) { logger.error("Error when parsing the parameters!", e); return; } + isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null. + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); + // This is needed to implement a unit test in which the spark session is created in the context of the + // unit test itself rather than inside the spark application" + parquet_file_path = parser.get("parquet_file_path"); if (parquet_file_path == null) { logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile); @@ -125,10 +113,8 @@ public class ContinuousValidator { logger .info( "Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \"" - + guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE); + + guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE_NAME); - - // TODO - USE THE "runWithSparkSession" METHOD TO RUN THE SPARK CODE INSIDE!! AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile(); SparkConf conf = new SparkConf(); @@ -168,7 +154,7 @@ public class ContinuousValidator { .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .json(finalOutputPath + RESULTS_FILE); // The filename should be the name of the input-file or the + .json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the // input-directory. if (logger.isDebugEnabled()) { diff --git a/dhp-workflows/dhp-continuous-validation/src/main/resources/input_continuous_validator_parameters.json b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/input_continuous_validator_parameters.json similarity index 100% rename from dhp-workflows/dhp-continuous-validation/src/main/resources/input_continuous_validator_parameters.json rename to dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validator/input_continuous_validator_parameters.json diff --git a/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java b/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java index b1b0c6af4..1a381b08e 100644 --- a/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java +++ b/dhp-workflows/dhp-continuous-validation/src/test/java/ReadResultsTest.java @@ -17,7 +17,7 @@ public class ReadResultsTest { try { List standardValidationResultList = new Gson() - .fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE)), List.class); + .fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE_NAME)), List.class); if (standardValidationResultList == null) logger.error("Could not map the json to a \"List\" object."); else if (standardValidationResultList.isEmpty()) @@ -25,9 +25,9 @@ public class ReadResultsTest { else logger.info(standardValidationResultList.toString()); } catch (FileNotFoundException fnfe) { - logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE + "\" does not exist!"); + logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE_NAME + "\" does not exist!"); } catch (Exception e) { - logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE + "\"", e); + logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE_NAME + "\"", e); } }