Code cleanup.

This commit is contained in:
Lampros Smyrnaios 2024-01-09 17:03:35 +02:00
parent 17282ea8fc
commit eaa070f1e6
2 changed files with 37 additions and 75 deletions

View File

@ -15,13 +15,13 @@
<!-- The "version" is inherited from the parent module. --> <!-- The "version" is inherited from the parent module. -->
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId> <artifactId>dhp-common</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>uoa-validator-engine2</artifactId> <artifactId>uoa-validator-engine2</artifactId>
@ -32,33 +32,18 @@
<artifactId>spark-core_${scala.binary.version}</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql --> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!--
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
</dependency>
-->
<!-- Other dependencies. --> <!-- Other dependencies. -->
<dependency> <dependency>
<groupId>com.google.code.gson</groupId> <groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId> <artifactId>gson</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -4,14 +4,12 @@ package eu.dnetlib.dhp.continuous_validator;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
@ -21,7 +19,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.openaire.*; import eu.dnetlib.validator2.validation.guideline.openaire.*;
import eu.dnetlib.validator2.validation.utils.TestUtils; import eu.dnetlib.validator2.validation.utils.TestUtils;
import scala.Option;
public class ContinuousValidator { public class ContinuousValidator {
@ -30,8 +27,6 @@ public class ContinuousValidator {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class); private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
private static final String parametersFile = "input_continuous_validator_parameters.json"; private static final String parametersFile = "input_continuous_validator_parameters.json";
private static final boolean SHOULD_GET_ARGUMENTS_FROM_FILE = true; // It throws an error for now..
public static void main(String[] args) { public static void main(String[] args) {
ArgumentApplicationParser parser = null; ArgumentApplicationParser parser = null;
@ -41,65 +36,48 @@ public class ContinuousValidator {
String guidelines = null; String guidelines = null;
String outputPath = null; String outputPath = null;
if (SHOULD_GET_ARGUMENTS_FROM_FILE) { try {
try { String jsonConfiguration = IOUtils
String jsonConfiguration = IOUtils .toString(
.toString( Objects
Objects .requireNonNull(
.requireNonNull( ContinuousValidator.class
ContinuousValidator.class .getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)),
.getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)), StandardCharsets.UTF_8);
StandardCharsets.UTF_8);
parser = new ArgumentApplicationParser(jsonConfiguration); parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args); parser.parseArgument(args);
} catch (Exception e) { } catch (Exception e) {
logger.error("Error when parsing the parameters!", e); logger.error("Error when parsing the parameters!", e);
return; return;
} }
isSparkSessionManaged = Optional isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null. .ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
.map(Boolean::valueOf) .map(Boolean::valueOf)
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// This is needed to implement a unit test in which the spark session is created in the context of the // This is needed to implement a unit test in which the spark session is created in the context of the
// unit test itself rather than inside the spark application" // unit test itself rather than inside the spark application"
parquet_file_path = parser.get("parquet_file_path"); parquet_file_path = parser.get("parquet_file_path");
if (parquet_file_path == null) { if (parquet_file_path == null) {
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile); logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
return; return;
} }
guidelines = parser.get("openaire_guidelines"); guidelines = parser.get("openaire_guidelines");
if (guidelines == null) { if (guidelines == null) {
logger logger
.error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile); .error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile);
return; return;
} }
outputPath = parser.get("outputPath"); outputPath = parser.get("outputPath");
if (outputPath == null) { if (outputPath == null) {
logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile); logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile);
return; return;
}
sparkMaster = "local[*]";
} else {
if (args.length != 4) {
String errorMsg = "Wrong number of arguments given! Please run the app like so: java -jar target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines> <outputPath>";
System.err.println(errorMsg);
logger.error(errorMsg);
System.exit(1);
}
sparkMaster = args[0];
logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
parquet_file_path = args[1];
guidelines = args[2];
outputPath = args[3];
} }
if (!outputPath.endsWith("/")) if (!outputPath.endsWith("/"))
@ -156,7 +134,6 @@ public class ContinuousValidator {
.option("compression", "gzip") .option("compression", "gzip")
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the .json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the
}); });
} }
} }