Initial commit of the "dhp-continuous-validation" module.

This commit is contained in:
Lampros Smyrnaios 2023-12-15 15:53:31 +02:00
parent 84d54643cf
commit 9e6a03e4e2
20 changed files with 1697 additions and 0 deletions

View File

@ -0,0 +1,3 @@
# Continuous Validation
[...]

View File

@ -0,0 +1,38 @@
# This script installs and runs the project.
DEFAULT_PROFILE='' # It's the empty profile.
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105
# So we have our own function, for use when a single command fails.
handle_error () {
echo -e "\n\n$1\n\n"; exit $2
}
# Change the working directory to the script's directory, when running from another location.
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
if [[ $# -eq 0 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <sparkMaster> <justRun: 0 | 1>"; exit 2
fi
sparkMaster=""
justRun=0
if [[ $# -eq 1 ]]; then
sparkMaster=$1
elif [[ $# -eq 2 ]]; then
sparkMaster=$1
justRun=$2
elif [[ $# -gt 2 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <sparkMaster> <justRun: 0 | 1>"; exit 3
fi
if [[ justRun -eq 0 ]]; then
mvn clean install ${CHOSEN_MAVEN_PROFILE}
fi
ContinuousValidator
test_parquet_file="./src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet"
java -jar ./target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar ${sparkMaster} ${test_parquet_file} 1

View File

@ -0,0 +1,380 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>dhp-continuous-validation</artifactId>
<!-- The "version" is inherited from the parent module. -->
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jackson.version>2.14.3</jackson.version> <!-- Scala doesn't want a higher version than 2.14.x -->
</properties>
<dependencies>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>uoa-validator-engine2</artifactId>
<version>0.9.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version>
<!--<scope>provided</scope>-->
<!--
<scope>compile</scope>
-->
<exclusions>
<!-- This is an older version which causes problems. We have to add the latest version independently. -->
<!-- This exists only in Hadoop 3.0.0+ -->
<!--<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
</exclusion>-->
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version>
<!--<scope>provided</scope>-->
<!--<scope>compile</scope>-->
<exclusions>
<!-- This exclusion is a must for scala 2.11 and spark 2.4.0.cloudera2 -->
<exclusion>
<groupId>com.twitter</groupId>
<artifactId>parquet-format</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.parquet/parquet-avro -->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>1.13.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<!--<version>${dhp.hadoop.version}</version>-->
<!--<scope>compile</scope>-->
<exclusions>
<exclusion>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.reload4j</groupId>
<artifactId>reload4j</artifactId>
</exclusion>
<!-- Vulnerable dependencies: -->
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
</exclusion>
<!-- This dependency is required in order for the program to run without errors. It is discontinued. -->
<!--<exclusion>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
</exclusion>-->
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-core -->
<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>4.2.22</version>
<!-- <scope>compile</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
<!-- <scope>compile</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
<!-- <scope>compile</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
<!-- <scope>compile</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-app -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<version>${dhp.hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<!-- Vulnerable dependencies: -->
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
<!--<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${dhp.hadoop.version}</version>
&lt;!&ndash; <scope>compile</scope>&ndash;&gt;
<exclusions>
<exclusion>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.reload4j</groupId>
<artifactId>reload4j</artifactId>
</exclusion>
&lt;!&ndash; Vulnerable dependencies: &ndash;&gt;
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>-->
<!-- Add back some updated version of the needed dependencies. -->
<!-- This should be enabled only when using Hadoop 3.0.0+ -->
<!--<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>${dhp.hadoop.version}</version>
</dependency>-->
<dependency> <!-- Newer versions (>=0.18.X) are not compatible with JAVA 8. -->
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>0.17.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>6.5.1</version>
</dependency>
<!-- Other dependencies. -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.10.1</version>
</dependency>
<!-- logback versions 1.4.X require Java-11 -->
<!-- logback versions 1.3.X require Java-8, but if this project is added as Dependency in a Spring Boot App (with v.2.7.x), then Spring Boot throws an error, since it does not yet support logback 1.3.x -->
<!-- https://mvnrepository.com/artifact/ch.qos.logback/logback-core -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
<version>1.2.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.36</version>
</dependency>
<!-- https://mvnrepository.com/artifact/ch.qos.logback/logback-classic -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-engine -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>5.10.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
<!-- <scope>compile</scope>-->
</dependency>
</dependencies>
<!--<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<transformers>
&lt;!&ndash; add Main-Class to manifest file &ndash;&gt;
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>eu.dnetlib.dhp.continuous_validator.ContinuousValidator</mainClass>
</transformer>
</transformers>
<filters>
&lt;!&ndash; filter manifest signature files when creating uber-jar &ndash;&gt;
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/INDEX.LIST</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>LICENSE*</exclude>
</excludes>
</filter>
</filters>
<createDependencyReducedPom>false</createDependencyReducedPom>
&lt;!&ndash; The "minimize" config breaks things at runtime. &ndash;&gt;
&lt;!&ndash;<minimizeJar>true</minimizeJar>&ndash;&gt;
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.2</version>
<configuration>
&lt;!&ndash;<excludes>
<exclude>some test to exclude here</exclude>
</excludes>&ndash;&gt;
</configuration>
</plugin>
</plugins>
</build>-->
<repositories>
<repository>
<id>libs</id>
<url>file:///${project.basedir}/libs</url>
</repository>
</repositories>
</project>

View File

@ -0,0 +1,9 @@
# This script deploys and runs the oozie workflow.
DEFAULT_PROFILE='' # It's the empty profile.
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
mvn clean package ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
-Dworkflow.source.dir=eu/dnetlib/dhp/continuous_validator

View File

@ -0,0 +1,211 @@
package eu.dnetlib.dhp.continuous_validator;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.BufferedWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import javax.jws.WebParam;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.openaire.AbstractOpenAireProfile;
import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile;
import eu.dnetlib.validator2.validation.utils.TestUtils;
import scala.Option;
public class ContinuousValidator {
public static final String TEST_FILES_V4_DIR = TestUtils.TEST_FILES_BASE_DIR + "openaireguidelinesV4/";
public static final String RESULTS_FILE = "results.json";
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
private static final String parametersFile = "input_continuous_validator_parameters.json";
private static final boolean SHOULD_GET_ARGUMENTS_FROM_FILE = true; // It throws an error for now..
public static void main(String[] args) {
ArgumentApplicationParser parser = null;
String sparkMaster = null;
Boolean isSparkSessionManaged = false;
String parquet_file_path = null;
String guidelines = null;
String outputPath = null;
if (SHOULD_GET_ARGUMENTS_FROM_FILE) {
try {
String jsonConfiguration = IOUtils
.toString(
Objects
.requireNonNull(
ContinuousValidator.class
.getResourceAsStream("/eu/dnetlib/dhp/continuous_validator/" + parametersFile)),
StandardCharsets.UTF_8);
parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
String isSParkSessionManagedStr = parser.get("isSparkSessionManaged");
if (isSParkSessionManagedStr == null) {
logger
.error(
"The \"isSParkSessionManagedStr\" was not retrieved from the parameters file: "
+ parametersFile);
return;
}
// This "is needed to implement a unit test in which the spark session is created in the context of the
// unit test itself rather than inside the spark application"
isSparkSessionManaged = Optional
.of(isSParkSessionManagedStr)
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// TODO - If the above is tru,e then the Spark-session defined in the unit-test should be used..
} catch (Exception e) {
logger.error("Error when parsing the parameters!", e);
return;
}
parquet_file_path = parser.get("parquet_file_path");
if (parquet_file_path == null) {
logger.error("The \"parquet_file_path\" was not retrieved from the parameters file: " + parametersFile);
return;
}
guidelines = parser.get("guidelines");
if (guidelines == null) {
logger.error("The \"guidelines\" was not retrieved from the parameters file: " + parametersFile);
return;
}
outputPath = parser.get("outputPath");
if (outputPath == null) {
logger.error("The \"outputPath\" was not retrieved from the parameters file: " + parametersFile);
return;
}
sparkMaster = "local[*]";
} else {
if (args.length != 4) {
String errorMsg = "Wrong number of arguments given! Please run the app like so: java -jar target/dhp-continuous-validation-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines> <outputPath>";
System.err.println(errorMsg);
logger.error(errorMsg);
System.exit(1);
}
sparkMaster = args[0];
logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
parquet_file_path = args[1];
guidelines = args[2];
outputPath = args[3];
if (!outputPath.endsWith("/"))
outputPath += "/";
}
logger
.info(
"Will validate the contents of parquetFile: \"" + parquet_file_path + "\", against guidelines: \""
+ guidelines + "\"" + " and will output the results in: " + outputPath + RESULTS_FILE);
// TODO - USE THE "runWithSparkSession" METHOD TO RUN THE SPARK CODE INSIDE!!
AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile();
SparkConf conf = new SparkConf();
conf.setAppName(ContinuousValidator.class.getSimpleName());
String finalParquet_file_path = parquet_file_path;
String finalOutputPath = outputPath;
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
Dataset<Row> parquetFileDF = spark.read().parquet(finalParquet_file_path);
parquetFileDF.show(5);
// Filter the results based on the XML-encoding and non-null id and body.
parquetFileDF = parquetFileDF
.filter(
parquetFileDF
.col("encoding")
.eqNullSafe("XML")
.and(parquetFileDF.col("id").isNotNull())
.and(parquetFileDF.col("body").isNotNull()));
// Use a new instance of Document Builder in each worker, as it is not thread-safe.
MapFunction<Row, XMLApplicationProfile.ValidationResult> validateMapFunction = row -> profile
.validate(
row.getAs("id").toString(),
TestUtils
.getDocumentBuilder()
.parse(IOUtils.toInputStream(row.getAs("body").toString(), StandardCharsets.UTF_8)));
Dataset<XMLApplicationProfile.ValidationResult> validationResultsDataset = parquetFileDF
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class));
logger.info("Showing a few validation-results.. just for checking");
validationResultsDataset.show(5);
// Write the results to json file immediately, without converting them to a list.
validationResultsDataset
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(finalOutputPath + RESULTS_FILE); // The filename should be the name of the input-file or the
// input-directory.
if (logger.isDebugEnabled()) {
List<XMLApplicationProfile.ValidationResult> validationResultsList = validationResultsDataset
.javaRDD()
.collect();
if (validationResultsList.isEmpty()) {
logger.error("The \"validationResultsList\" was empty!");
return;
}
validationResultsList.forEach(vr -> logger.debug(vr.id() + " | score:" + vr.score()));
for (XMLApplicationProfile.ValidationResult result : validationResultsList)
logger.debug(result.toString());
}
// TODO - REMOVE THIS WHEN THE WRITE FROM ABOVE IS OK
/*
* try (BufferedWriter writer = Files .newBufferedWriter(Paths.get(outputPath + RESULTS_FILE),
* StandardCharsets.UTF_8)) { writer.write(new Gson().toJson(validationResultsList)); } catch (Exception e)
* { logger.error("Error when writing the \"validationResultsList\" as json into the results-file: " +
* outputPath + RESULTS_FILE); return; }
*/
Option<String> uiWebUrl = spark.sparkContext().uiWebUrl();
if (uiWebUrl.isDefined()) {
logger
.info(
"Waiting 60 seconds, before shutdown, for the user to check the jobs' status at: "
+ uiWebUrl.get());
try {
Thread.sleep(60_000);
} catch (InterruptedException ignored) {
}
} else
logger.info("The \"uiWebUrl\" is not defined, in order to check the jobs' status. Shutting down..");
});
}
}

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.continuous_validator.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.InputFile;
import org.slf4j.LoggerFactory;
public class ParquetUtils {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ParquetUtils.class);
private static final Configuration parquetConfig = new Configuration();
public static List<GenericRecord> getParquetRecords(String fullFilePath) {
InputFile inputFile;
try { // TODO - Verify that this will create any directories which do not exist in the provided path. Currently
// we create the directories beforehand.
inputFile = HadoopInputFile.fromPath(new Path(fullFilePath), parquetConfig);
// logger.trace("Created the parquet " + outputFile); // DEBUG!
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown.
// "Throwable" catches EVERYTHING!
logger.error("", e);
return null;
}
List<GenericRecord> records = new ArrayList<>();
GenericRecord record;
try (ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord> builder(inputFile).build()) {
while ((record = reader.read()) != null) {
records.add(record);
}
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown.
// "Throwable" catches EVERYTHING!
logger.error("Problem when creating the \"ParquetWriter\" object or when writing the records with it!", e);
// At some point, I got an "NoSuchMethodError", because of a problem in the AvroSchema file:
// (java.lang.NoSuchMethodError: org.apache.avro.Schema.getLogicalType()Lorg/apache/avro/LogicalType;).
// The error was with the schema: {"name": "date", "type" : ["null", {"type" : "long", "logicalType" :
// "timestamp-millis"}]},
return null;
}
return records; // It may be empty.
}
public static Map<String, String> getIdXmlMapFromParquetFile(String parquetFileFullPath) {
List<GenericRecord> recordList = ParquetUtils.getParquetRecords(parquetFileFullPath);
if (recordList == null)
return null; // The error is already logged.
else if (recordList.isEmpty()) {
logger.error("The parquet-file \"" + parquetFileFullPath + "\" had no records inside!");
return null;
}
Map<String, String> idXmlMap = new HashMap<>();
for (GenericRecord record : recordList) {
if (logger.isTraceEnabled())
logger.trace(record.toString());
Object id = record.get("id");
if (id == null)
continue;
String idStr = id.toString();
Object encoding = record.get("encoding");
if (encoding == null) {
logger.warn("Record with id = \"" + idStr + "\" does not provide the encoding for its body!");
continue;
}
String encodingStr = encoding.toString();
if (!encodingStr.equals("XML")) {
logger.warn("Record with id = \"" + idStr + "\" does not have XML encoding for its body!");
continue;
}
Object body = record.get("body");
if (body == null) {
logger.warn("Record with id = \"" + idStr + "\" does not have a body!");
continue;
}
String bodyStr = body.toString();
idXmlMap.put(idStr, bodyStr);
// logger.debug(idStr + " | " + idXmlMap.get(idStr));
}
return idXmlMap;
}
}

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<value>1G</value>
</property>
</configuration>

View File

@ -0,0 +1,111 @@
<workflow-app name="Validate metadata records against OpenAIRE Guidelines" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>parquet_file_path</name>
<value>./src/test/resources/part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet</value>
<description>the full path of the parquet file</description>
</property>
<property>
<name>openaire_guidelines</name>
<value>4.0</value>
<description>the version of the OpenAIRE Guidelines to validate the records against</description>
</property>
<property>
<name>outputPath</name>
<value>.</value>
<description>the (float) version of the OpenAIRE Guidelines to validate the records against</description>
</property>
<property>
<name>sparkDriverMemory</name>
<value>4096M</value>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<value>2048m</value>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<value>4</value>
<description>number of cores used by single executor</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<!--<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>-->
<property>
<name>spark2EventLogDir</name>
<value>spark2/logs</value>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<!--<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>-->
</configuration>
</global>
<start to="ValidateMetadataRecords"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ValidateMetadataRecords">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Validate multiple records against OpenAIRE Guidelines</name>
<class>eu.dnetlib.dhp.continuous_validator.ContinuousValidator</class>
<jar>dhp-continuous-validation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
<!-- &#45;&#45;conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
&#45;&#45;conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<!-- Arguments passed to the "main" method of the class defined above. -->
<arg>--parquet_file_path</arg><arg>${parquet_file_path}</arg>
<arg>--openaire_guidelines</arg><arg>${openaire_guidelines}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,26 @@
[
{
"paramName": "issm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "when true will stop SparkSession after job execution",
"paramRequired": false
},
{
"paramName": "prq_file",
"paramLongName": "parquet_file_path",
"paramDescription": "the full path for the parquet file to be processed",
"paramRequired": true
},
{
"paramName": "guidelines",
"paramLongName": "openaire_guidelines",
"paramDescription": "the version of the OpenAIRE Guidelines to validate the records against",
"paramRequired": true
},
{
"paramName": "o",
"paramLongName": "outputPath",
"paramDescription": "the path of the output-directory where the result-json-files will be stored",
"paramRequired": true
}
]

View File

@ -0,0 +1,43 @@
<configuration debug="false">
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/> <!-- Default delay: 0ms (zero) -->
<!-- <Appenders>-->
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/ContinuousValidator.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
<fileNamePattern>logs/ContinuousValidator.%i.log.zip</fileNamePattern>
<minIndex>1</minIndex>
<maxIndex>10</maxIndex>
</rollingPolicy>
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
<maxFileSize>100MB</maxFileSize> <!-- Lower the size to check the "RollingFileAppender" during tests. -->
</triggeringPolicy>
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
</encoder>
</appender>
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
</encoder>
</appender>
<!-- </Appenders>-->
<root level="debug">
<appender-ref ref="Console" />
</root>
<logger name="eu.dnetlib.dhp.continuous_validator" level="trace"/>
<logger name="eu.dnetlib.validator2" level="error"/>
<logger name="org.sparkproject" level="info"/>
<logger name="org.apache.spark" level="info"/>
<logger name="org.apache.hadoop" level="info"/>
<logger name="org.apache.parquet" level="info"/>
</configuration>

View File

@ -0,0 +1,33 @@
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.continuous_validator.utils.ParquetUtils;
import eu.dnetlib.validator2.validation.utils.TestUtils;
public class ReadParquetDataTest {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ReadParquetDataTest.class);
private static final String parquetFileFullPath = TestUtils.TEST_FILES_BASE_DIR
+ "part-00589-733117df-3822-4fce-bded-17289cc5959a-c000.snappy.parquet";
public static void main(String[] args) {
testParquetRead();
}
@Test
public static void testParquetRead() {
Map<String, String> idXmlMap = ParquetUtils.getIdXmlMapFromParquetFile(parquetFileFullPath);
if (idXmlMap == null) {
logger.error("Could not create the \"idXmlMap\" from parquet-file: " + parquetFileFullPath);
System.exit(99);
} else if (idXmlMap.isEmpty())
logger.warn("The generated \"idXmlMap\" was empty, for parquet-file: " + parquetFileFullPath);
else
logger.info("The \"idXmlMap\" was successfully generated, for parquet-file: " + parquetFileFullPath);
}
}

View File

@ -0,0 +1,34 @@
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.List;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.continuous_validator.ContinuousValidator;
public class ReadResultsTest {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
public static void main(String[] args) {
try {
List standardValidationResultList = new Gson()
.fromJson(new BufferedReader(new FileReader(ContinuousValidator.RESULTS_FILE)), List.class);
if (standardValidationResultList == null)
logger.error("Could not map the json to a \"List\" object.");
else if (standardValidationResultList.isEmpty())
logger.warn("The \"standardValidationResultList\" is empty!");
else
logger.info(standardValidationResultList.toString());
} catch (FileNotFoundException fnfe) {
logger.error("The results-file \"" + ContinuousValidator.RESULTS_FILE + "\" does not exist!");
} catch (Exception e) {
logger.error("Error when reading the json-results-file \"" + ContinuousValidator.RESULTS_FILE + "\"", e);
}
}
}

View File

@ -0,0 +1,126 @@
import static eu.dnetlib.dhp.continuous_validator.ContinuousValidator.TEST_FILES_V4_DIR;
import java.io.BufferedWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.gson.JsonIOException;
import eu.dnetlib.dhp.continuous_validator.ContinuousValidator;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile;
import eu.dnetlib.validator2.validation.utils.TestUtils;
import scala.Option;
public class ValidateTestFiles {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidator.class);
public static final String RESULTS_FILE = "results.json";
public static void main(String[] args) {
if (args.length != 3) {
String errorMsg = "Wrong number of arguments given! PLease run the app like so: java -jar build/libs/continuous-validator-1.0.0-SNAPSHOT.jar <sparkMaster> <parquetFileFullPath> <guidelines>";
System.err.println(errorMsg);
logger.error(errorMsg);
System.exit(1);
}
String sparkMaster = args[0];
logger.info("Will use this Spark master: \"" + sparkMaster + "\".");
String parquetFileFullPath = args[1];
String guidelines = args[2];
logger
.info(
"Will validate the contents of parquetFile: \"" + parquetFileFullPath + "\", against guidelines: \""
+ guidelines + "\".");
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("Continuous-Validator");
sparkConf.setMaster(sparkMaster); // Run on the Spark Cluster.
sparkConf.set("spark.driver.memory", "4096M");
sparkConf
.set("spark.executor.instances", "4") // 4 executors
.set("spark.executor.cores", "1"); // 1 core per executor
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.rdd.compress", "true");
String appVersion = "1.0.0-SNAPSHOT";
/*
* try { Class<?> klass = Class.forName("eu.dnetlib.continuous_validator.BuildConfig"); appVersion = (String)
* klass.getDeclaredField("version").get(null); if ( logger.isTraceEnabled() )
* logger.trace("The app's version is: " + appVersion); } catch (Exception e) {
* logger.error("Error when acquiring the \"appVersion\"!", e); System.exit(1); }
*/
sparkConf.setJars(new String[] {
"build/libs/continuous-validator-" + appVersion + "-all.jar"
}); // This is the "fat-Jar".
sparkConf.validateSettings();
logger.debug("Spark custom configurations: " + sparkConf.getAll().toString());
LiteratureGuidelinesV4Profile profile = new LiteratureGuidelinesV4Profile();
try (JavaSparkContext sc = new JavaSparkContext(sparkConf)) {
JavaPairRDD<String, String> jprdd = sc.wholeTextFiles(TEST_FILES_V4_DIR);
logger.info("Showing the validation-results..");
// Use a new instance of Document Builder in each worker, as it is not thread-safe.
// The "x._1" is the filename and the "x._2" in the content of the file.
List<XMLApplicationProfile.ValidationResult> validationResultsList = jprdd
.map(
x -> profile
.validate(
x._1,
TestUtils.getDocumentBuilder().parse(IOUtils.toInputStream(x._2, StandardCharsets.UTF_8))))
.collect();
if (validationResultsList.isEmpty()) {
logger.error("The \"validationResultsList\" was empty!");
return;
}
if (logger.isDebugEnabled())
validationResultsList.forEach(vr -> logger.debug(vr.id() + " | score:" + vr.score()));
logger.debug(validationResultsList.toString());
try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(RESULTS_FILE), StandardCharsets.UTF_8)) {
writer.write(new Gson().toJson(validationResultsList));
} catch (Exception e) {
logger
.error(
"Error when writing the \"validationResultsList\" as json into the results-file: "
+ RESULTS_FILE);
return;
}
Option<String> uiWebUrl = sc.sc().uiWebUrl();
if (uiWebUrl.isDefined()) {
logger
.info(
"Waiting 60 seconds, before shutdown, for the user to check the jobs' status at: "
+ uiWebUrl.get());
Thread.sleep(60_000);
} else
logger.info("The \"uiWebUrl\" is not defined, in order to check the jobs' status. Shutting down..");
} catch (JsonIOException jie) {
logger.error("Error when writing the validation results to the json file: " + jie.getMessage());
} catch (Exception e) {
logger.error("Error validating directory: " + TEST_FILES_V4_DIR, e);
}
}
}

View File

@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://namespace.openaire.eu/schema/oaire/" xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<datacite:titles>
<datacite:title xml:lang="eng">Journal bearings subjected to dynamic loads: the analytical mobility method</datacite:title>
</datacite:titles>
<datacite:creators>
<datacite:creator>
<datacite:creatorName>Flores, Paulo</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>Claro, José Carlos Pimenta</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>Ambrósio, Jorge</datacite:creatorName>
</datacite:creator>
</datacite:creators>
<datacite:contributors>
<datacite:contributor contributorType="HostingInstitution">
<datacite:contributorName>Universidade do Minho</datacite:contributorName>
<datacite:nameIdentifier nameIdentifierScheme="e-mail" schemeURI="mailto:repositorium@sdum.uminho.pt">repositorium@sdum.uminho.pt</datacite:nameIdentifier>
</datacite:contributor>
</datacite:contributors>
<datacite:relatedIdentifiers/>
<datacite:dates>
<datacite:date dateType="Accepted">2005-06-16</datacite:date>
<datacite:date dateType="Issued">2005-06-16</datacite:date>
</datacite:dates>
<dc:language>eng</dc:language>
<oaire:resourceType uri="http://purl.org/coar/resource_type/c_5794" resourceTypeGeneral="literature">conference paper</oaire:resourceType>
<dc:description xml:lang="por">The main purpose of this work is to use the analytical mobility method to analyze journal bearings subjected to dynamic loads, with the intent to include it in a general computational program that has been developed for the dynamic analysis of general mechanical systems. A simple journal bearing subjected to a dynamic load is chosen as a demonstrative example, in order to provide the necessary results for a comprehensive discussion of the methodology presented.</dc:description>
<dc:description xml:lang="por">Fundação para a Ciência e a Tecnologia (FCT)</dc:description>
<dc:description xml:lang="por">Fundo Comunitário Europeu FEDER under project POCTI/EME/2001/38281, entitled Dynamic of Mechanical Systems with joint Clearances and Imperfections</dc:description>
<dc:format>application/pdf</dc:format>
<datacite:identifier identifierType="Handle">https://hdl.handle.net/1822/18042</datacite:identifier>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
<datacite:subjects>
<datacite:subject>Dynamic bearings</datacite:subject>
<datacite:subject>Hydrodynamic lubrication</datacite:subject>
</datacite:subjects>
<datacite:sizes>
<datacite:size>294443 bytes</datacite:size>
</datacite:sizes>
<oaire:file accessRightsURI="http://purl.org/coar/access_right/c_16ec" mimeType="application/pdf" objectType="fulltext">https://repositorium.sdum.uminho.pt/bitstream/1822/18042/1/CI-2005_02.pdf</oaire:file>
<oaire:citationTitle>IberTrib - 3º Congresso Ibérico de Tribologia</oaire:citationTitle>
<oaire:citationStartPage>1</oaire:citationStartPage>
<oaire:citationEndPage>15</oaire:citationEndPage>
<oaire:citationConferencePlace>Guimarães, Portugal</oaire:citationConferencePlace>
</resource>

View File

@ -0,0 +1,59 @@
<?xml version='1.0' encoding='UTF-8'?><record xmlns="http://www.openarchives.org/OAI/2.0/">
<header>
<identifier>oai:mediarep.org:doc/2534</identifier>
<datestamp>2020-03-27T11:35:21Z</datestamp>
<setSpec setName="Zeitschrift für Medienwissenschaft">com_doc_568</setSpec>
<setSpec setName="Zeitschriften">com_doc_179</setSpec>
<setSpec>col_doc_1963</setSpec>
<setSpec setName="OpenAIRE">openaire</setSpec>
</header>
<metadata>
<resource xmlns="http://namespace.openaire.eu/schema/oaire/"
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<datacite:title>Die autogerechte Stadt (Rückführung)</datacite:title>
<datacite:creators>
<datacite:creator>
<datacite:creatorName>Schmitt, Arne</datacite:creatorName>
</datacite:creator>
</datacite:creators>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="URL">https://mediarep.org/handle/doc/2534</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="IsPartOf">1869-1722</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<dc:language>deu</dc:language>
<dc:publisher>diaphanes</dc:publisher>
<datacite:date dateType="Issued">2015</datacite:date>
<resourceType resourceTypeGeneral="literature" uri="http://purl.org/coar/resource_type/c_6501">
journal article
</resourceType>
<dc:description>Bildstrecke ausgewählter Abbildungen aus Hans Bernhard Reichows Buch DIE AUTOGERECHTE STADT.</dc:description>
<dc:format>application/pdf</dc:format>
<datacite:identifier identifierType="DOI">http://dx.doi.org/10.25969/mediarep/1436</datacite:identifier>
<datacite:rights uri="http://purl.org/coar/access_right/c_abf2">
open access
</datacite:rights>
<datacite:subjects>
<datacite:subject>Infrastruktur</datacite:subject>
<datacite:subject>Urbanität</datacite:subject>
<datacite:subject>Mobilität</datacite:subject>
<datacite:subject subjectScheme="DDC">300</datacite:subject>
</datacite:subjects>
<version uri="http://purl.org/coar/version/c_970fb48d4fbd8a85">
VoR
</version>
<file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="&#xa; fulltext&#xa; ">https://mediarep.org/bitstream/doc/2534/1/ZfM_12_104-133_Schmitt_Die_autogerechte_Stadt.pdf</file>
<citationTitle>Zeitschrift für Medienwissenschaft</citationTitle>
<citationVolume>7</citationVolume>
<citationIssue>1</citationIssue>
<citationStartPage>104</citationStartPage>
<citationEndPage>113</citationEndPage>
<dcterms:audience>Researchers</dcterms:audience>
<dcterms:audience>Students</dcterms:audience>
</resource>
</metadata>
</record>

View File

@ -0,0 +1,251 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--This document supports all available OpenAIRE Guidelines for Literature Repositories v4-->
<resource
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns="http://namespace.openaire.eu/schema/oaire/"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<!--1. Title (M)-->
<datacite:titles>
<datacite:title xml:lang="en-US">
National Institute for Environmental Studies and Center
for Climate System Research Japan
</datacite:title>
<datacite:title xml:lang="en-US" titleType="Subtitle">A survey</datacite:title>
</datacite:titles>
<!--2. Creator (M)-->
<datacite:creators>
<datacite:creator>
<datacite:creatorName nameType="Personal">Evans, R.J.</datacite:creatorName>
<datacite:givenName>Robert</datacite:givenName>
<datacite:familyName>Evans</datacite:familyName>
<datacite:affiliation>Institute of Science and Technology</datacite:affiliation>
<datacite:nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">
1234-1234-1234-1234
</datacite:nameIdentifier>
</datacite:creator>
</datacite:creators>
<!--3. Contributor (MA)-->
<datacite:contributors>
<datacite:contributor contributorType="Other">
<datacite:contributorName nameType="Organizational">i-5uvLfGD2R.y8fXv442y8o.D6v1</datacite:contributorName>
<datacite:givenName>givenName3</datacite:givenName>
<datacite:familyName>familyName3</datacite:familyName>
<datacite:nameIdentifier nameIdentifierScheme="hK9c28uVD" schemeURI="http://fkPymUoN/">MuDRo.ymY7qG1Or.9Ny</datacite:nameIdentifier>
<datacite:nameIdentifier nameIdentifierScheme="k_Q7K8AbQiwLlreHzyU" schemeURI="http://EaTpassQ/">a1Sp</datacite:nameIdentifier>
<datacite:affiliation>affiliation5</datacite:affiliation>
<datacite:affiliation>affiliation6</datacite:affiliation>
</datacite:contributor>
<datacite:contributor contributorType="Editor">
<datacite:contributorName nameType="Organizational">ZxHQr4TGMeqqEOTbMcNd_EMFvIf</datacite:contributorName>
<datacite:givenName>givenName4</datacite:givenName>
<datacite:familyName>familyName4</datacite:familyName>
<datacite:nameIdentifier nameIdentifierScheme="PWhc-.4-Ock6m2WlzXL4km" schemeURI="http://KTYQUXaS/">nJNWaCv</datacite:nameIdentifier>
<datacite:nameIdentifier nameIdentifierScheme="fZBhNb-U2t" schemeURI="http://BJDgOJde/">RobhZUU3Obaqq-3UlrgxKNe-c0</datacite:nameIdentifier>
<datacite:affiliation>affiliation7</datacite:affiliation>
<datacite:affiliation>affiliation8</datacite:affiliation>
</datacite:contributor>
</datacite:contributors>
<!--4. Funding Reference (MA)-->
<oaire:fundingReferences>
<oaire:fundingReference>
<oaire:funderName>pZT7SjviMp4yodvIG</oaire:funderName>
<oaire:funderIdentifier funderIdentifierType="ISNI">ff238yCNbhv5k5Y8AmbpyzYt</oaire:funderIdentifier>
<oaire:fundingStream>svI_6A</oaire:fundingStream>
<oaire:awardNumber awardURI="http://TNrUwkxq/">E-</oaire:awardNumber>
<oaire:awardTitle>award</oaire:awardTitle>
</oaire:fundingReference>
<oaire:fundingReference>
<oaire:funderName>funderName</oaire:funderName>
<oaire:funderIdentifier funderIdentifierType="Crossref Funder">QwqkD4y</oaire:funderIdentifier>
<oaire:fundingStream>yyADU1SFmiS-meqQ6</oaire:fundingStream>
<oaire:awardNumber awardURI="http://oKladiXr/">JyORkpwzt</oaire:awardNumber>
<oaire:awardTitle>gyg2F9EKT7Gp.mSBU8.Drqf</oaire:awardTitle>
</oaire:fundingReference>
</oaire:fundingReferences>
<!--5. Alternate Identifier (R)-->
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="DOI">10.1002/chem.201701589</datacite:alternateIdentifier>
<datacite:alternateIdentifier alternateIdentifierType="PMID">PMC5574022</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<!--6. Related Identifier (R)-->
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="HasMetadata" relatedMetadataScheme="sdfhr" schemeURI="http://fsdfg.cm" schemeType="XSD" resourceTypeGeneral="DataPaper">
0947-6539
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="EISSN" relationType="IsPartOf" resourceTypeGeneral="Model">1521-3765</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:dates>
<!--7. Embargo Period Date (MA)-->
<datacite:date dateType="Accepted">2011-12-01</datacite:date>
<datacite:date dateType="Available">2012-12-31</datacite:date>
<!--10. Publication Date (M)-->
<datacite:date dateType="Issued">2011</datacite:date>
</datacite:dates>
<!--8. Language (MA)-->
<dc:language>eng</dc:language>
<dc:language>deu</dc:language>
<dc:language>nld</dc:language>
<!--9. Publisher (MA)-->
<dc:publisher>
Loughborough University. Department of Computer Science
</dc:publisher>
<dc:publisher>John Wiley &amp; Sons, Inc. (US)</dc:publisher>
<!--11. Resource Type (M)-->
<oaire:resourceType resourceTypeGeneral="literature" uri="http://purl.org/coar/resource_type/c_93fc">report</oaire:resourceType>
<!--12. Description (MA)-->
<dc:description xml:lang="en-US">
Foreword [by] Hazel Anderson; Introduction; The scientific heresy:
transformation of a society; Consciousness as causal reality [etc]
</dc:description>
<dc:description xml:lang="en-US">
A number of problems in quantum state and system identification are
addressed.
</dc:description>
<!--13. Format (R)-->
<dc:format>video/quicktime</dc:format>
<dc:format>application/pdf</dc:format>
<dc:format>application/xml</dc:format>
<!--14. Resource Identifier (M)-->
<datacite:identifier identifierType="URN">http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-160648</datacite:identifier>
<!--15. Access Rights (M)-->
<datacite:rights uri="http://purl.org/coar/access_right/c_f1cf">embargoed access</datacite:rights>
<!--16. Source (R)-->
<dc:source>Ecology Letters (1461023X) vol.4 (2001)</dc:source>
<dc:source>Ecology Letters (1461025X) vol.5 (2002)</dc:source>
<!--17. Subject (MA)-->
<datacite:subjects>
<datacite:subject subjectScheme="fAOT3oCMLqANRdqCniD" schemeURI="http://bsOyHkIY/" valueURI="http://ZiIiSVAz/" xml:lang="it-IT">ft12Zy</datacite:subject>
<datacite:subject subjectScheme="b-FUtIiLRu6RsuIcde3KKhzz.9" schemeURI="http://ttXSOOlB/" valueURI="http://GixLBNfF/"
xml:lang="el-GR">Bf6AElMCkh.mqutOmETp0</datacite:subject>
</datacite:subjects>
<!--18. License Condition (R)-->
<oaire:licenseCondition startDate="2019-02-01" uri="http://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial</oaire:licenseCondition>
<!--19. Coverage (R)-->
<dc:coverage>2000-2010</dc:coverage>
<dc:coverage>
scheme=historic; content=Ming Dynasty
</dc:coverage>
<!--20. Size (O)-->
<datacite:sizes>
<datacite:size>15 pages</datacite:size>
<datacite:size>6 MB</datacite:size>
</datacite:sizes>
<!--21. Geo Location (O)-->
<datacite:geoLocations>
<datacite:geoLocation>
<datacite:geoLocationPlace>Atlantic Ocean</datacite:geoLocationPlace>
<datacite:geoLocationPoint>
<datacite:pointLongitude>31.233</datacite:pointLongitude>
<datacite:pointLatitude>-67.302</datacite:pointLatitude>
</datacite:geoLocationPoint>
<datacite:geoLocationBox>
<datacite:westBoundLongitude>-71.032</datacite:westBoundLongitude>
<datacite:eastBoundLongitude>-68.211</datacite:eastBoundLongitude>
<datacite:southBoundLatitude>41.090</datacite:southBoundLatitude>
<datacite:northBoundLatitude>42.893</datacite:northBoundLatitude>
</datacite:geoLocationBox>
<datacite:geoLocationPolygon>
<datacite:polygonPoint>
<datacite:pointLongitude>88.557</datacite:pointLongitude>
<datacite:pointLatitude>-0.604</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>-143.373</datacite:pointLongitude>
<datacite:pointLatitude>88.832</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>87.3</datacite:pointLongitude>
<datacite:pointLatitude>-36.556</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>129.128</datacite:pointLongitude>
<datacite:pointLatitude>4.616</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:inPolygonPoint>
<datacite:pointLongitude>-17.547</datacite:pointLongitude>
<datacite:pointLatitude>-47.629</datacite:pointLatitude>
</datacite:inPolygonPoint>
</datacite:geoLocationPolygon>
<datacite:geoLocationPolygon>
<datacite:polygonPoint>
<datacite:pointLongitude>78.121</datacite:pointLongitude>
<datacite:pointLatitude>19.341</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>-118.035</datacite:pointLongitude>
<datacite:pointLatitude>53.647</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>-49.07</datacite:pointLongitude>
<datacite:pointLatitude>-45.561</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:polygonPoint>
<datacite:pointLongitude>132.484</datacite:pointLongitude>
<datacite:pointLatitude>-41.146</datacite:pointLatitude>
</datacite:polygonPoint>
<datacite:inPolygonPoint>
<datacite:pointLongitude>179.293</datacite:pointLongitude>
<datacite:pointLatitude>15.364</datacite:pointLatitude>
</datacite:inPolygonPoint>
</datacite:geoLocationPolygon>
</datacite:geoLocation>
</datacite:geoLocations>
<!--22. Resource Version (R)-->
<oaire:version uri="http://purl.org/coar/version/c_be7fb7dd8ff6fe43">NA</oaire:version>
<!--23. File Location (MA)-->
<oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://link-to-the-fulltext.org</oaire:file>
<oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://europepmc.org/articles/PMC5574022?pdf=render</oaire:file>
<!--24. Citation Title (R)-->
<oaire:citationTitle>some Journal Title</oaire:citationTitle>
<!--25. Citation Volume (R)-->
<oaire:citationVolume>10</oaire:citationVolume>
<!--26. Citation Issue (R)-->
<oaire:citationIssue>1</oaire:citationIssue>
<!--27. Citation Start Page (R)-->
<oaire:citationStartPage>100</oaire:citationStartPage>
<!--28. Citation End Page (R)-->
<oaire:citationEndPage>105</oaire:citationEndPage>
<!--29. Citation Edition (R)-->
<oaire:citationEdition>2</oaire:citationEdition>
<!--30. Citation Conference Place (R)-->
<oaire:citationConferencePlace>Berlin</oaire:citationConferencePlace>
<!--31. Citation Conference Date (R)-->
<oaire:citationConferenceDate>2013-10-22 - 2013-10-23</oaire:citationConferenceDate>
<!--32. Audience (O)-->
<dcterms:audience>Researchers</dcterms:audience>
<dcterms:audience>Students</dcterms:audience>
</resource>

View File

@ -0,0 +1,197 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--This document supports all available OpenAIRE Guidelines for Literature Repositories v4-->
<resource
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns="http://namespace.openaire.eu/schema/oaire/"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<!--1. Title (M)-->
<datacite:titles>
<datacite:notitle xml:lang="en-US">
National Institute for Environmental Studies and Center
for Climate System Research Japan
</datacite:notitle>
</datacite:titles>
<!--2. Creator (M)-->
<datacite:creators>
<datacite:creator>
<!-- <datacite:creatorName nameType="Personal">Evans, R.J.</datacite:creatorName>-->
<!-- <datacite:givenName>Robert</datacite:givenName>-->
<!-- <datacite:familyName>Evans</datacite:familyName>-->
<!-- <datacite:affiliation>Institute of Science and Technology</datacite:affiliation>-->
<!-- <datacite:nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org">-->
<!-- 1234-1234-1234-1234-->
<!-- </datacite:nameIdentifier>-->
</datacite:creator>
</datacite:creators>
<!--3. Contributor (MA)-->
<datacite:contributors>
<datacite:contributor>
<!-- <datacite:contributorName nameType="Organizational">i-5uvLfGD2R.y8fXv442y8o.D6v1</datacite:contributorName>-->
<!-- <datacite:givenName>givenName3</datacite:givenName>-->
<!-- <datacite:familyName>familyName3</datacite:familyName>-->
<datacite:nameIdentifier>MuDRo.ymY7qG1Or.9Ny</datacite:nameIdentifier>
<datacite:nameIdentifier nameIdentifierScheme="k_Q7K8AbQiwLlreHzyU" schemeURI="http://EaTpassQ/">a1Sp</datacite:nameIdentifier>
<datacite:affiliation>affiliation5</datacite:affiliation>
<datacite:affiliation>affiliation6</datacite:affiliation>
</datacite:contributor>
</datacite:contributors>
<!--4. Funding Reference (MA)-->
<oaire:fundingReferences>
<oaire:fundingReference>
<!-- <oaire:funderName>pZT7SjviMp4yodvIG</oaire:funderName> 1 error for missing mandatory element-->
<oaire:funderIdentifier funderIdentifierType="invalid">ff238yCNbhv5k5Y8AmbpyzYt</oaire:funderIdentifier><!--1 warning for incorrect value of funderIdentifierType-->
<oaire:fundingStream>svI_6A</oaire:fundingStream>
<oaire:awardNumber>E-</oaire:awardNumber><!--1 warning for missing awardURI attribute-->
<!--1 warning for missing oaire:awardTitle element-->
</oaire:fundingReference>
<oaire:fundingReference>
<oaire:funderName>funderName</oaire:funderName>
<oaire:funderIdentifier funderIdentifierType="Crossref Funder">QwqkD4y</oaire:funderIdentifier>
<oaire:fundingStream>yyADU1SFmiS-meqQ6</oaire:fundingStream>
<oaire:awardNumber awardURI="http://oKladiXr/">JyORkpwzt</oaire:awardNumber>
<oaire:awardTitle>gyg2F9EKT7Gp.mSBU8.Drqf</oaire:awardTitle>
</oaire:fundingReference>
</oaire:fundingReferences>
<!--5. Alternate Identifier (R)-->
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="invalid">10.1002/chem.201701589</datacite:alternateIdentifier><!--1 error for incorrect value of alternateIdentifierType-->
<datacite:alternateIdentifier alternateIdentifierType="PMID">PMC5574022</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<!--6. Related Identifier (R)-->
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="ISSN" relationType="HasMetadata" relatedMetadataScheme="sdfhr" schemeURI="http://fsdfg.cm" schemeType="XSD" resourceTypeGeneral="DataPaper">
0947-6539
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="invalid" relationType="invalid" resourceTypeGeneral="Model">1521-3765</datacite:relatedIdentifier><!--2 errors for incorrect value of relationType and relatedIdentifierType-->
</datacite:relatedIdentifiers>
<datacite:dates>
<!--7. Embargo Period Date (MA)-->
<datacite:nodate dateType="Accepted">2011-12-01</datacite:nodate>
<datacite:nodate dateType="Available">2012-12-01</datacite:nodate>
<!--10. Publication Date (M)-->
<datacite:date dateType="invalid">2011</datacite:date>
</datacite:dates>
<!--8. Language (MA)-->
<dc:language>eng</dc:language>
<dc:language>deu</dc:language>
<dc:language>invalidTag</dc:language><!--1 error for not allowed value-->
<!--9. Publisher (MA)-->
<dc:nopublisher>
Loughborough University. Department of Computer Science
</dc:nopublisher>
<dc:nopublisher>John Wiley &amp; Sons, Inc. (US)</dc:nopublisher>
<!--11. Resource Type (M)-->
<oaire:resourceType resourceTypeGeneral="invalid" uri="http://invalid">report</oaire:resourceType><!--2 errors for invalid mandatory attributes-->
<!--12. Description (MA)-->
<dc:description xml:lang="invalidTag"><!--1 warning for not allowed value of optional attribute-->
Foreword [by] Hazel Anderson; Introduction; The scientific heresy:
transformation of a society; Consciousness as causal reality [etc]
</dc:description>
<dc:description xml:lang="en-US">
A number of problems in quantum state and system identification are
addressed.
</dc:description>
<!--13. Format (R)-->
<dc:format>video/quicktime</dc:format>
<dc:format>application/pdf</dc:format>
<dc:format>application/invalid</dc:format><!--Should return 1 warning for not allowed value?-->
<!--14 . Resource Identifier (M)-->
<datacite:identifier identifierType="invalid">http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-160648</datacite:identifier><!--1 error for invalid mandatory attribute-->
<!--15. Access Rights (M)-->
<datacite:rights uri="http://purl.org/coar/access_right/c_f1cf">invalid</datacite:rights><!--1 error for not allowed value-->
<!--16. Source (R)-->
<dc:nosource>Ecology Letters (1461023X) vol.4 (2001)</dc:nosource>
<dc:nosource>Ecology Letters (1461025X) vol.5 (2002)</dc:nosource>
<!--17. Subject (MA)-->
<datacite:subjects>
<datacite:subject>ft12Zy</datacite:subject><!--3 warnings for missing attributes-->
<datacite:subject subjectScheme="b-FUtIiLRu6RsuIcde3KKhzz.9" schemeURI="http://ttXSOOlB/" valueURI="http://GixLBNfF/"
xml:lang="el-GR">Bf6AElMCkh.mqutOmETp0</datacite:subject>
</datacite:subjects>
<!--18. License Condition (R)-->
<oaire:licenseCondition startDate="invalid date">Creative Commons Attribution-NonCommercial</oaire:licenseCondition><!--2 errors for invalid attributes-->
<!--19. Coverage (R)-->
<dc:nocoverage>2000-2010</dc:nocoverage>
<dc:nocoverage>
scheme=historic; content=Ming Dynasty
</dc:nocoverage>
<!--20. Size (O)-->
<datacite:sizes>
<!-- <datacite:size>15 pages</datacite:size>-->
<!-- <datacite:size>6 MB</datacite:size>-->
</datacite:sizes>
<!--21. Geo Location (O)-->
<datacite:geoLocations>
<datacite:geoLocation>
<datacite:geoLocationPlace>Atlantic Ocean</datacite:geoLocationPlace>
<datacite:geoLocationPoint>
<datacite:pointLongitude>31.233</datacite:pointLongitude>
<datacite:nopointLatitude>-67.302</datacite:nopointLatitude><!--1 error-->
</datacite:geoLocationPoint>
<datacite:geoLocationBox>
<datacite:westBoundLongitude>-71.032</datacite:westBoundLongitude>
<datacite:eastBoundLongitude>-68.211</datacite:eastBoundLongitude>
<datacite:nosouthBoundLatitude>41.090</datacite:nosouthBoundLatitude><!--1 error-->
<datacite:northBoundLatitude>42.893</datacite:northBoundLatitude>
</datacite:geoLocationBox>
</datacite:geoLocation>
</datacite:geoLocations>
<!--22. Resource Version (R)-->
<oaire:version>irrelevant</oaire:version><!--1 warning for missing MA attribute-->
<!--23. File Location (MA)-->
<oaire:file accessRightsURI="http://invalid" mimeType="invalid/type" objectType="invalid">http://link-to-the-fulltext.org</oaire:file><!--3 warnings for incorrect attributes-->
<oaire:file accessRightsURI="http://purl.org/coar/access_right/c_abf2" mimeType="application/pdf" objectType="fulltext">http://europepmc.org/articles/PMC5574022?pdf=render</oaire:file>
<!--24. Citation Title (R)-->
<oaire:nocitationTitle>some Journal Title</oaire:nocitationTitle>
<!--25. Citation Volume (R)-->
<oaire:nocitationVolume>10</oaire:nocitationVolume>
<!--26. Citation Issue (R)-->
<oaire:nocitationIssue>1</oaire:nocitationIssue>
<!--27. Citation Start Page (R)-->
<oaire:nocitationStartPage>100</oaire:nocitationStartPage>
<!--28. Citation End Page (R)-->
<oaire:nocitationEndPage>105</oaire:nocitationEndPage>
<!--29. Citation Edition (R)-->
<oaire:nocitationEdition>2</oaire:nocitationEdition>
<!--30. Citation Conference Place (R)-->
<oaire:nocitationConferencePlace>Berlin</oaire:nocitationConferencePlace>
<!--31. Citation Conference Date (R)-->
<oaire:citationConferenceDate>2013-02-29</oaire:citationConferenceDate><!--1 warning for not allowed value-->
<!--32. Audience (O)-->
<dcterms:noaudience>Researchers</dcterms:noaudience>
<dcterms:noaudience>Students</dcterms:noaudience>
</resource>

View File

@ -39,6 +39,7 @@
<module>dhp-doiboost</module>
<module>dhp-impact-indicators</module>
<module>dhp-swh</module>
<module>dhp-continuous-validation</module>
</modules>
<pluginRepositories>