Code cleanup.
This commit is contained in:
parent
17282ea8fc
commit
eaa070f1e6
|
@ -15,13 +15,13 @@
|
||||||
<!-- The "version" is inherited from the parent module. -->
|
<!-- The "version" is inherited from the parent module. -->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>uoa-validator-engine2</artifactId>
|
<artifactId>uoa-validator-engine2</artifactId>
|
||||||
|
@ -32,33 +32,18 @@
|
||||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
|
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<!--
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.thrift</groupId>
|
|
||||||
<artifactId>libthrift</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.woodstox</groupId>
|
|
||||||
<artifactId>woodstox-core</artifactId>
|
|
||||||
</dependency>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Other dependencies. -->
|
<!-- Other dependencies. -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.code.gson</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
<artifactId>gson</artifactId>
|
<artifactId>gson</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -4,14 +4,12 @@ package eu.dnetlib.dhp.continuous_validator;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.Row;
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
@ -21,7 +19,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
|
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
|
||||||
import eu.dnetlib.validator2.validation.guideline.openaire.*;
|
import eu.dnetlib.validator2.validation.guideline.openaire.*;
|
||||||
import eu.dnetlib.validator2.validation.utils.TestUtils;
|
import eu.dnetlib.validator2.validation.utils.TestUtils;
|
||||||
import scala.Option;
|
|
||||||
|
|
||||||
public class ContinuousValidator {
|
public class ContinuousValidator {
|
||||||
|
|
||||||
|
@ -30,8 +27,6 @@ public class ContinuousValidator {
|
||||||
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
|
||||||
private static final String parametersFile = "input_continuous_validator_parameters.json";
|
private static final String parametersFile = "input_continuous_validator_parameters.json";
|
||||||
|
|
||||||
private static final boolean SHOULD_GET_ARGUMENTS_FROM_FILE = true; // It throws an error for now..
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
||||||
ArgumentApplicationParser parser = null;
|
ArgumentApplicationParser parser = null;
|
||||||
|
@ -41,65 +36,48 @@ public class ContinuousValidator {
|
||||||
String guidelines = null;
|
String guidelines = null;
|
||||||
String outputPath = null;
|
String outputPath = null;
|
||||||
|
|
||||||
if (SHOULD_GET_ARGUMENTS_FROM_FILE) {
|
try {
|
||||||
try {
|
String jsonConfiguration = IOUtils
|
||||||
String jsonConfiguration = IOUtils
|
.toString(
|
||||||
.toString(
|
Objects
|
||||||
Objects
|
.requireNonNull(
|
||||||
.requireNonNull(
|
ContinuousValidator.class
|
||||||
ContinuousValidator.class
|
.getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)),
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)),
|
StandardCharsets.UTF_8);
|
||||||
StandardCharsets.UTF_8);
|
|
||||||
|
|
||||||
parser = new ArgumentApplicationParser(jsonConfiguration);
|
parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error when parsing the parameters!", e);
|
logger.error("Error when parsing the parameters!", e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
isSparkSessionManaged = Optional
|
isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
|
.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
// This is needed to implement a unit test in which the spark session is created in the context of the
|
// This is needed to implement a unit test in which the spark session is created in the context of the
|
||||||
// unit test itself rather than inside the spark application"
|
// unit test itself rather than inside the spark application"
|
||||||
|
|
||||||
parquet_file_path = parser.get("parquet_file_path");
|
parquet_file_path = parser.get("parquet_file_path");
|
||||||
if (parquet_file_path == null) {
|
if (parquet_file_path == null) {
|
||||||
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
|
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
guidelines = parser.get("openaire_guidelines");
|
guidelines = parser.get("openaire_guidelines");
|
||||||
if (guidelines == null) {
|
if (guidelines == null) {
|
||||||
logger
|
logger
|
||||||
.error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile);
|
.error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
outputPath = parser.get("outputPath");
|
outputPath = parser.get("outputPath");
|
||||||
if (outputPath == null) {
|
if (outputPath == null) {
|
||||||
logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile);
|
logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile);
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
sparkMaster = "local[*]";
|
|
||||||
} else {
|
|
||||||
if (args.length != 4) {
|
|
||||||
String errorMsg = "Wrong number of arguments given! Please run the app like so: java -jar target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines> <outputPath>";
|
|
||||||
System.err.println(errorMsg);
|
|
||||||
logger.error(errorMsg);
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
sparkMaster = args[0];
|
|
||||||
logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
|
|
||||||
|
|
||||||
parquet_file_path = args[1];
|
|
||||||
guidelines = args[2];
|
|
||||||
outputPath = args[3];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!outputPath.endsWith("/"))
|
if (!outputPath.endsWith("/"))
|
||||||
|
@ -156,7 +134,6 @@ public class ContinuousValidator {
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the
|
.json(finalOutputPath + RESULTS_FILE_NAME); // The filename should be the name of the input-file or the
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue