Compare commits

...

21 Commits

Author SHA1 Message Date
Lampros Smyrnaios aaa7d3cf86 Merge remote-tracking branch 'main/beta' into continuous_validation2 2024-02-05 11:57:45 +02:00
Claudio Atzori 42f5506306 [orcid enrichment] fixed directory cleanup before distcp 2024-02-05 09:45:36 +02:00
Lampros Smyrnaios cbe7c6734a - Add documentation.
- Code polishing/cleanup.
2024-02-02 14:18:46 +02:00
Lampros Smyrnaios b5f4d37827 Merge branch 'beta' of https://code-repo.d4science.org/lsmyrnaios/dnet-hadoop into continuous_validation2 2024-01-31 13:01:42 +02:00
Alessia Bardi f2a08d8cc2 test for Italian records from IRS repositories 2024-01-30 19:20:14 +01:00
Miriam Baglioni a5995ab557 [orcid-enrichment] change the value of parameters. 2024-01-29 18:19:48 +01:00
Lampros Smyrnaios 8d0ed7d414 Continuous-Validation updates:
- Update the "uoa-validator-engine2" dependency.
- Update the "installProject.sh" script to account for potential conflict with previous builds.
- Add documentation.
2024-01-25 18:23:14 +02:00
Lampros Smyrnaios eee085c104 Force a check for updated releases and snapshots on remote repositories, when building the Continuous-Validation workflow. 2024-01-24 14:08:32 +02:00
Lampros Smyrnaios ff47a941f5 - Add the "installProject.sh" script.
- Show the Job-ID or potential deployment-error-logs, right after the deployment of the workflow.
- Code polishing.
2024-01-18 18:06:50 +02:00
Lampros Smyrnaios 00644ef487 - Fix the "NoSuchFieldError", caused by library-conflicts, by introducing the "oozie.libpath" property in "workflow.xml".
- Fix the value of the "outputPath" property, in "workflow.xml".
2024-01-18 15:46:27 +02:00
Lampros Smyrnaios 23ec57c670 Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into continuous_validation2 2024-01-17 18:16:11 +02:00
Lampros Smyrnaios c17834dddf - Use "KryoSerializer" for Spark and register some "result" classes.
- Code polishing and cleanup.
2024-01-17 18:08:05 +02:00
Lampros Smyrnaios 32e02247bc Merge branch 'continuous_validation2' of https://code-repo.d4science.org/lsmyrnaios/dnet-hadoop into continuous_validation2 2024-01-09 17:05:07 +02:00
Lampros Smyrnaios eaa070f1e6 Code cleanup. 2024-01-09 17:03:35 +02:00
Claudio Atzori fc35b44e22 bumped version of uoa-validator-engine2 to 0.9.3 2024-01-09 15:56:56 +01:00
Lampros Smyrnaios 17282ea8fc - Fix the "is not NULL" checks inside "spark.filter()"
- Make sure the "outputPath" ends with a "/", in any case.
- Fix a parameter-description.
2023-12-20 15:15:56 +02:00
Claudio Atzori 22fa60c3dd removed lib biinary 2023-12-18 15:50:29 +01:00
Claudio Atzori 24173d7a0b continuous validation WIP 2023-12-18 15:46:36 +01:00
Lampros Smyrnaios a2feda6c07 - Fix acquiring the "openaire_guidelines" parameter.
- Use the right Guidelines-profile, depending on the "openaire_guidelines" version.
- Update log-levels.
- Optimize imports.
2023-12-18 10:57:40 +02:00
Lampros Smyrnaios b71633fd7f - Fix the location of the "input_continuous_validator_parameters.json" file.
- Fix handing the "isSparkSessionManaged" parameter.
- Add the "provided" scope for some dependencies. They do not inherit it from the main pom, since the "version" tag is declared, even though the value is the same as the one from the main pom.
- Code polishing / cleanup.
2023-12-15 18:29:38 +02:00
Lampros Smyrnaios 9e6a03e4e2 Initial commit of the "dhp-continuous-validation" module. 2023-12-15 15:53:31 +02:00
20 changed files with 794 additions and 29 deletions

View File

@ -7,10 +7,10 @@ import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import eu.dnetlib.pace.model.Person;
import jdk.nashorn.internal.ir.annotations.Ignore;
public class UtilTest {
@ -22,7 +22,7 @@ public class UtilTest {
}
@Test
@Ignore
@Disabled
public void paceResolverTest() {
PaceResolver paceResolver = new PaceResolver();
paceResolver.getComparator("keywordMatch", params);

View File

@ -19,10 +19,7 @@ import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
@NoArgsConstructor
@AllArgsConstructor
@Getter
@Setter
public class BipProjectModel {
String projectId;
@ -34,6 +31,17 @@ public class BipProjectModel {
String totalCitationCount;
public BipProjectModel() {
}
public BipProjectModel(String projectId, String numOfInfluentialResults, String numOfPopularResults, String totalImpulse, String totalCitationCount) {
this.projectId = projectId;
this.numOfInfluentialResults = numOfInfluentialResults;
this.numOfPopularResults = numOfPopularResults;
this.totalImpulse = totalImpulse;
this.totalCitationCount = totalCitationCount;
}
// each project bip measure has exactly one value, hence one key-value pair
private Measure createMeasure(String measureId, String measureValue) {
@ -71,4 +79,43 @@ public class BipProjectModel {
createMeasure("totalCitationCount", totalCitationCount));
}
public String getProjectId() {
return projectId;
}
public void setProjectId(String projectId) {
this.projectId = projectId;
}
public String getNumOfInfluentialResults() {
return numOfInfluentialResults;
}
public void setNumOfInfluentialResults(String numOfInfluentialResults) {
this.numOfInfluentialResults = numOfInfluentialResults;
}
public String getNumOfPopularResults() {
return numOfPopularResults;
}
public void setNumOfPopularResults(String numOfPopularResults) {
this.numOfPopularResults = numOfPopularResults;
}
public String getTotalImpulse() {
return totalImpulse;
}
public void setTotalImpulse(String totalImpulse) {
this.totalImpulse = totalImpulse;
}
public String getTotalCitationCount() {
return totalCitationCount;
}
public void setTotalCitationCount(String totalCitationCount) {
this.totalCitationCount = totalCitationCount;
}
}

View File

@ -0,0 +1,33 @@
# Continuous Validation
This module is responsible for deploying an **Oozie Workflow** (on the desired cluster), which executes a **Spark** action.<br>
This action takes the HDFS-path of a directory of parquet files containing metadata records, and applies the validation process on all of them, in parallel. Then it outputs the results, in json-format, in the given directory.<br>
The validation process is powered by the "[**uoa-validator-engine2**](https://code-repo.d4science.org/MaDgIK/uoa-validator-engine2)" software,
which is included as a dependency inside the main "pom.xml" file.<br>
### Configure the workflow
Add the wanted values for each of the parameters, defined in the "/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml" file.<br>
The most important parameters are the following:
- ***parquet_path***: the input parquet
- ***openaire_guidelines***: valid values: "4.0", "3.0", "2.0", "fair_data", "fair_literature_v4"
- ***output_path***: Be careful to use a base directory which is different from the one that this module is running on, as during a new deployment, that base directory will be deleted.
### Install the project and then deploy and run the workflow
Run the **./installProject.sh** script and then the **./runOozieWorkflow.sh** script.<br>
Use the "workflow-id" displayed by the "runOozieWorkflow.sh" script to check the running status and logs, in the remote machine, as follows:
- Check the status: `oozie job -oozie http://<cluster's domain and port>/oozie -info <Workflow-ID>`
- Copy the "Job-id" from the output of the above command (numbers with ONE underscore between them).
- Check the job's logs (not the app's logs!): `yarn logs -applicationId application_<Job-ID>`
<br><br>
**Note**:<br>
If you encounter any "java.lang.NoSuchFieldError" issues in the logs, rerun using the following steps:
- Delete some remote directories related to the workflow in your user's dir: /user/<userName>/
- ***.sparkStaging***
- ***oozie-oozi***
- Run the **./installProject.sh** script and then the **./runOozieWorkflow.sh** script.

View File

@ -0,0 +1,18 @@
# Install the whole "dnet-hadoop" project.
# Delete this module's previous build-files in order to avoid any conflicts.
rm -rf target/ ||
# Go to the root directory of this project.
cd ../../
# Select the build profile.
DEFAULT_PROFILE='' # It's the empty profile.
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
# Install the project.
mvn clean install -U ${CHOSEN_MAVEN_PROFILE} -Dmaven.test.skip=true
# We skip tests for all modules, since the take a big amount of time and some of them fail.
# Any test added to this module, will be executed in the "runOozieWorkflow.sh" script.

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>dhp-continuous-validation</artifactId>
<!-- The "version" is inherited from the parent module. -->
<dependencies>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>uoa-validator-engine2</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,14 @@
# This script deploys and runs the oozie workflow on the cluster, defined in the "~/.dhp/application.properties" file.
# Select the build profile.
DEFAULT_PROFILE='' # It's the empty profile.
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
# Build and deploy this module.
mvn clean package -U ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
-Dworkflow.source.dir=eu/dnetlib/dhp/continuous_validation
# Show the Oozie-job-ID.
echo -e "\n\nShowing the contents of \"extract-and-run-on-remote-host.log\":\n"
cat ./target/extract-and-run-on-remote-host.log

View File

@ -0,0 +1,144 @@
package eu.dnetlib.dhp.continuous_validation;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.validator2.validation.StandardValidationResult;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.StandardResult;
import eu.dnetlib.validator2.validation.guideline.openaire.*;
import eu.dnetlib.validator2.validation.utils.TestUtils;
public class ContinuousValidation {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidation.class);
private static final String parametersFile = "input_continuous_validation_parameters.json";
public static void main(String[] args) {
ArgumentApplicationParser parser = null;
Boolean isSparkSessionManaged = false;
String parquetPath = null;
String guidelines = null;
String outputPath = null;
try {
String jsonConfiguration = IOUtils
.toString(
Objects
.requireNonNull(
ContinuousValidation.class
.getResourceAsStream("/eu/dnetlib/dhp/continuous_validation/" + parametersFile)),
StandardCharsets.UTF_8);
parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
} catch (Exception e) {
logger.error("Error when parsing the parameters!", e);
return;
}
isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// This is needed to implement a unit test in which the spark session is created in the context of the
// unit test itself rather than inside the spark application"
// Set the parquet input, either a parquet-file or a directory with parquet files.
parquetPath = parser.get("parquet_path");
if (parquetPath == null) {
logger.error("The \"parquet_path\" was not retrieved from the parameters file: " + parametersFile);
return;
}
guidelines = parser.get("openaire_guidelines");
if (guidelines == null) {
logger.error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile);
return;
}
outputPath = parser.get("output_path");
if (outputPath == null) {
logger.error("The \"output_path\" was not retrieved from the parameters file: " + parametersFile);
return;
}
if (!outputPath.endsWith(File.separator))
outputPath += File.separator;
logger
.info(
"Will validate the contents of parquetFile: \"" + parquetPath + "\", against guidelines: \""
+ guidelines + "\"" + " and will output the results in the outputPath: " + outputPath);
AbstractOpenAireProfile profile;
switch (guidelines) {
case "4.0":
profile = new LiteratureGuidelinesV4Profile();
break;
case "3.0":
profile = new LiteratureGuidelinesV3Profile();
break;
case "2.0":
profile = new DataArchiveGuidelinesV2Profile();
break;
case "fair_data":
profile = new FAIR_Data_GuidelinesProfile();
break;
case "fair_literature_v4":
profile = new FAIR_Literature_GuidelinesV4Profile();
break;
default:
logger.error("Invalid OpenAIRE Guidelines were given: " + guidelines);
return;
}
SparkConf conf = new SparkConf();
conf.setAppName(ContinuousValidation.class.getSimpleName());
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(new Class[] {
XMLApplicationProfile.ValidationResult.class, Guideline.Result.class, StandardValidationResult.class,
StandardResult.class
});
String finalParquetPath = parquetPath;
String finalOutputPath = outputPath;
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
// Use a new instance of Document Builder in each worker, as it is not thread-safe.
MapFunction<Row, XMLApplicationProfile.ValidationResult> validateMapFunction = row -> profile
.validate(
row.getAs("id").toString(),
TestUtils
.getDocumentBuilder()
.parse(IOUtils.toInputStream(row.getAs("body").toString(), StandardCharsets.UTF_8)));
spark
.read()
.parquet(finalParquetPath)
.filter("encoding = 'XML' and id is not NULL and body is not NULL")
.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(finalOutputPath); // The output files have names of the input files which were processed.
});
}
}

View File

@ -0,0 +1,26 @@
[
{
"paramName": "issm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "when true will stop SparkSession after job execution",
"paramRequired": false
},
{
"paramName": "prq_file",
"paramLongName": "parquet_path",
"paramDescription": "the full path of a parquet-file or a directory with parquet files, to be processed",
"paramRequired": true
},
{
"paramName": "guidelines",
"paramLongName": "openaire_guidelines",
"paramDescription": "the version of the OpenAIRE Guidelines to validate the records against",
"paramRequired": true
},
{
"paramName": "o",
"paramLongName": "output_path",
"paramDescription": "the path of the output-directory where the result-json-files will be stored",
"paramRequired": true
}
]

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>sparkExecutorMemoryOverhead</name>
<value>1G</value>
</property>
</configuration>

View File

@ -0,0 +1,115 @@
<workflow-app name="Validate metadata records against OpenAIRE Guidelines" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>parquet_path</name>
<value>/var/lib/dnet/mdstore_PROD/md-7763c517-538d-4aa7-83f8-6096b3ce0d96/md-7763c517-538d-4aa7-83f8-6096b3ce0d96-1702622132535/store</value>
<description>the full path of a parquet-file or a directory with parquet files, to be processed</description>
</property>
<property>
<name>openaire_guidelines</name>
<value>4.0</value>
<description>the version of the OpenAIRE Guidelines to validate the records against</description>
</property>
<property>
<name>output_path</name>
<value>/user/${dhp.hadoop.frontend.user.name}/continuous_validation/output</value>
<description>the path of the output-directory where the result-json-files will be stored</description>
</property>
<property>
<name>sparkDriverMemory</name>
<value>4096M</value>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<value>2048m</value>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<value>4</value>
<description>number of cores used by single executor</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<!--<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>-->
<property>
<name>spark2EventLogDir</name>
<value>spark2/logs</value>
<description>spark 2.* event log dir location</description>
</property>
<property>
<name>oozie.libpath</name>
<value>${nameNode}/user/oozie/share/lib</value>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="ValidateMetadataRecords"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ValidateMetadataRecords">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Validate multiple records against OpenAIRE Guidelines</name>
<class>eu.dnetlib.dhp.continuous_validation.ContinuousValidation</class>
<jar>dhp-continuous-validation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<!-- Arguments passed to the "main" method of the class defined above. -->
<arg>--parquet_path</arg><arg>${parquet_path}</arg>
<arg>--openaire_guidelines</arg><arg>${openaire_guidelines}</arg>
<arg>--output_path</arg><arg>${output_path}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,43 @@
<configuration debug="false">
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/> <!-- Default delay: 0ms (zero) -->
<!-- <Appenders>-->
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/ContinuousValidation.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
<fileNamePattern>logs/ContinuousValidation.%i.log.zip</fileNamePattern>
<minIndex>1</minIndex>
<maxIndex>10</maxIndex>
</rollingPolicy>
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
<maxFileSize>100MB</maxFileSize> <!-- Lower the size to check the "RollingFileAppender" during tests. -->
</triggeringPolicy>
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
</encoder>
</appender>
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
</encoder>
</appender>
<!-- </Appenders>-->
<root level="debug">
<appender-ref ref="Console" />
</root>
<logger name="eu.dnetlib.dhp.continuous_validation" level="debug"/>
<logger name="eu.dnetlib.validator2" level="error"/>
<logger name="org.sparkproject" level="info"/>
<logger name="org.apache.spark" level="info"/>
<logger name="org.apache.hadoop" level="info"/>
<logger name="org.apache.parquet" level="info"/>
</configuration>

View File

@ -30,7 +30,6 @@ import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.schema.orcid.AuthorData;
import eu.dnetlib.doiboost.orcid.util.DownloadsReport;
import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector;
import jdk.nashorn.internal.ir.annotations.Ignore;
public class OrcidClientTest {
final int REQ_LIMIT = 24;
@ -152,7 +151,7 @@ public class OrcidClientTest {
}
// @Test
@Ignore
@Disabled
private void testModifiedDate() throws ParseException {
testDate(toRetrieveDate);
testDate(toNotRetrieveDate);
@ -332,7 +331,7 @@ public class OrcidClientTest {
}
@Test
@Ignore
@Disabled
void testUpdatedRecord() throws Exception {
final String base64CompressedRecord = IOUtils
.toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64"));
@ -341,7 +340,7 @@ public class OrcidClientTest {
}
@Test
@Ignore
@Disabled
void testUpdatedWork() throws Exception {
final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA==";
final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
@ -413,7 +412,7 @@ public class OrcidClientTest {
}
@Test
@Ignore
@Disabled
void testDownloadedAuthor() throws Exception {
final String base64CompressedWork = "H4sIAAAAAAAAAI2Yy26jMBSG932KiD0hIe1MiwiVZjGLkWbX2XRHsFOsgs3YJmnefszFFy4+mUhtVPz9P/gcH/vQ9PWrrjYXzAVh9Bjst7tgg2nBEKEfx+DP28/wOdgImVOUV4ziY3DDInjNHlKOC8ZRMnxtmlyWxyDaqU+ofg7h/uX7IYwfn+Ngo25ARUKoxJzm1TEopWySKLper1vGC4LU74+IikgTWoFRW+SyfyyfxCBag4iQhBawyoGMDjdqJrnECJAZRquYLDEPaV5jv8oyWlXj+qTiXZLGr7KMiQbnjAOR6IY1W7C6hgIwjGt6SKGfHsY13ajHYipLIcIyJ5Xw6+akdvjEtyt4wxEwM6+VGph5N2zYr2ENhQRhKsmZYChmS1j7nFs6VIBPOwImKhyfMVeFg6GAWEjrcoQ4FoBmBGwVXYhagGHDBIEX+ZzUDiqyn35VN6rJUpUJ4zc/PAI2T03FbrUKJZQszWjV3zavVOjvVfoE01qB+YUUQPGNwHTt3luxJjdqh1AxJFBKLWOrSeCcF13RtxxYtlPOPqH6m+MLwVfoMQ2kdae2ArLajc6fTxkI1nIoegs0yB426pMO+0fSw07xDKMu0XKSde5C2VvrlVMijRzFwqY7XTJI1QMLWcmEzMxtDdxfHiYSgTNJnYJ1K9y5k0tUrMgrnGGaRiuXxxuClulYUbr0nBvpkYLjvgTCGsuSoex3f1CEvRPHKI184NJKtKeaiO7cD5E61bJ4F+9DFd7d01u8Tw6H5BBvvz8f3q3nXLGIeJULGdaqeVBBRK7rS7h/fNvvk/gpedxt4923dxP7Fc3KtKuc1BhlkrfYmeN4dcmrhmbw60+HmWw2CKgbTuqc32CXKTTmeTWT6bDBjPsQ0DTpnchdaYO0ayQ2FyLIiVREqs25aU8VKYLRbK0BsyZuqvr1MU2Sm/rDdhe/2CRN6FU/b+oBVyj1zqRtC5F8kAumfTclsl+s7EoNQu64nfOaVLeezX60Z3XCULLi6GI2IZGTEeey7fec9lBAuXawIHKcpifE7GABHWfoxLVfpUNPBXoMbZWrHFsR3bPAk9J9i2sw9nW6AQT1mpk++7JhW+v44Hmt8PomJqfD13jRnvFOSxCKtu6qHoyBbQ7cMFo750UEfGaXm6bEeplXIXj2hvL6mA7tzvIwmM9pbJFBG834POZdLGi2gH2u9u0K9HMwn5PTioFWLufzmrS4oNuU9Pkt2rf/2jMs7fMdm2rQTTM+j+49AzToAVuXYA1mD2k0+XdE9vAP+JYR5NcQAAA=";
final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
@ -421,7 +420,7 @@ public class OrcidClientTest {
}
@Test
@Ignore
@Disabled
void testDownloadedWork() throws Exception {
final String base64CompressedWork = "H4sIAAAAAAAAANVa63LiOBb+z1Oo+LVbhbkGAlTCLE1Id9IhTQV6unr/CVvB2tiWR5Khmal5rX2BfbE9ki3b3Jzt6Y13h6pQSPrOXTo6knL10zffQxvCBWXBdbVVb1YRCWzm0GB9Xf28vLX6VSQkDhzssYBcV3dEVH8aVa62jL8M1RcKI2kBAYwNLnrtXrMPFCGW7nW10YSPBX8dq3XRb1swNGgomkaG3FBBV9SjcnddDaOVR+0qApUCMaSBJDzA3nXVlTIcNhrb7bbOuE0d+F43AtEwCENBnMjGUhtyjiSFGBqHCkkDu5gqB0rpSMgJsCJOAVmKMVRMuoRbAfbJeaoMY6h84q8gQi4Nz1NlmNQbnDNe4Ak1bLA28/0iB8TjBg1GMV5gdzxu0CGoxSBKlkMkpp44T3eINBxeyG5bKDABpJb7QF1guRpOsd/iOWRRhwSSPlNS5LNjsOHzHAXxmjlHmwBSr3DyTDgsNVLkkAxk6LDjcCIKaBJAtoo2FCagFTJBiyf5IdJwUAv2PJUaNUgXlgnju/PgBJDFKfTYzgdXFgXLYAzVLxH2wPWvrfQ9mKEVhG+oXbD4EsD+3H1txqaxgQwBPqRFIc0w2WoSBHNbLfqIF0zbfVymIbQ52VCyLVIzBRm6VeQVRFWNHuoHDASLeJH3jqDVUQXB5yrOH0ObE5UNLQe+R+1mu2U1u1Z7sGy2hq3esN2tt5oXf79qnELv8fGwkJYPmxSswD1uA6vVXrY7w+5g2G3WuxedjNsJmj2escJx33G/ZXsU5iAs/AyRR0WcjpRXBLglc0lM1BjP59bX1qw9Hn/+dH87/dy9vBikeinKkyzVHjoqJNWIk7QuE3KU6pES6O7MwsarJh44QW1KowcWOCxAC9tlzEPsGX3YrYGQICgS0JKzENach2bEoTYNyKEQzaJyQnzSqesKSaV3IhRx92L8tLAm7GerjbZUujSwlFnIobqKkTuth+Q4ED4Vqqypp5JyfK8ah5Ji0f8AZVSGT2TZVGXfBLw/liOyqdRpJqfyXr8ldyEZrehKkm8Jr/2hc3Qb7EVk9DfMJbU98pu3k+6aETXXBebCZpt23tBaBUfSZRxdo98eYmgNfRxrh3zAnldDM/37FvZ+IiWtoQfddgiaEGBIDGCG7btA7jgBP9svAK2h90l4yYqIGop5jgMHXA4J0NB9ksR+YTX0qFtfqACO01jGjDHFPx552AW2W0P3uvGROk4NLfTvCeNS8X9MaDg1rL9Qz6PYh7En3f4ZNmKS6nUfQYFmE6PYe05IYBqPFGaq5wHlYpaoDbYqxokVK+JBerz51z+BIzc+SfSdTHVrTiSYtZzGFNOdGrr5ohsLF2+NUguqppkDoua6/S6yXwAYu44pM+/HiZ1BwEDWMqYbC5fjZ+MEBwMjb4PRLdTFYWrUwiUhJH/H+G3pMl/7fjqJhTGwSwU5lnfLsVDmxIPvmRetbJeCOsvfaxWXbXWxLVziqNky51BLW1OP2JKzgNoASSa7Gk1WAfrLI9mirzBBIUD1r/W/AgrMla7CjEMOzYBJolo30/mnxd0SzadPt5+eZtMb9O7rEN1wNINgEA8Ha+IxNMdrHLCQRR4TFRCudnmB7m6GqD0YDCqW+lQqlfnndw93iw/TJ/RwN5k+TqZDNJkAQyUvUlWvktjrdgbQEeI1EapN8Grd7MOeYJlfajSxWVOMfcIhVQXgfcFsqhcceobVA/U3GjsbDCYrjVSKSz0wHo8Xym6dArRvvjsbAfUGouFr8s5lG9o72DVVSy1saDqMqlarWW+12r2GiIXXMzuAU6AQcLLqWf3mZRf6iOlsNQdda9BudhQnvNNdPWN8XA7BgU5G2k3pLADA75XD3BSnn3y+3M90SbZWGczkxiRVmfSaJrd0V8u0yG3CeYRyht7O07Ste45weuqNmhcpLO44woEPRq1eilLN/f3ntEqGPFfzi2PmudHTO3EOEKf60LdTyUeDr7KIIzKfTfqtdr896JxklQtbES/IQD7UyL+SZIJSXYhLHkHZ9oqEjPR1MRzWu550cDYdCeI9n+S4hzouUU76+UeCQJ0fjkKn0+v3m703i0Eh/z97BCDH/XAAziTIt4rH94j7s4dHbSY/HJ90e3qriBQL+MMxCGETs9j/QxiSQ5PaS63/QsZqdS8vOxdvtj7Oc//fL4dTI2LvDAfVA6erSDKe3+cPxw70j4c5HHZlfLT9iAEZYKjZkxOYKZxymJy659l/t+QZllC5bvVJrzShD5GN0/NkiaZyqNcJh0NrdngtTfp7wviaHB+SS1Ng7O+Sk3h5HodT4S8RyY78pUmGM6eEg1l8tVCa1KnvY/SgrzDKsxRLF46j+uahNKH3BE6lsIb1lUxpUhdS3WUE+u6nPP/qiyAsklumMhMz9SBNqeus0oQ+QXqwIa7m3qy87IhXnBLPI8kVXXlZMaASm5vAEqWuKYkvHMtbPdiPiIdm6dVmeVMZjX+lfnKDWmaRAT7ev6ctTfhEF3RoWnJeXlKfSXcHcsf69rk0wTd4Qx30RV9yl5et2Ipwqe/SS5MJXiU8vbIv2b/qZaC8PZ65AUwj9QJR3vx1mQ9b7VPy1FFebnSpWq7xi0qJuwA+fLYpL7rwJdLXobcSa97kM4Cl35f3YXmofp0+8R9gBc/XeXL9Vn38pH7mLTs27z9T8ky1n7ynlZ0I4le78rYzl6t/woG5krwQlpcRcLDD2UPkH5F73C9G5tFKfY0q/wa1TIHI0CgAAA==";
final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);

View File

@ -114,7 +114,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
</spark>
<ok to="wait"/>
@ -142,7 +142,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
</spark>
<ok to="wait"/>
@ -170,7 +170,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
</spark>
<ok to="wait"/>
@ -198,7 +198,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
</spark>
<ok to="wait"/>
@ -225,8 +225,8 @@
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/orcid/orcidprop</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
<arg>--sourcePath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
</spark>
<ok to="fork-join-exec-propagation"/>
<error to="Kill"/>
@ -247,9 +247,10 @@
<class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--executor-cores=4
--executor-memory=4G
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=5G
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -259,9 +260,9 @@
--conf spark.speculation=false
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
--conf spark.sql.shuffle.partitions=3840
--conf spark.sql.shuffle.partitions=15000
</spark-opts>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
@ -291,7 +292,7 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
</spark-opts>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
@ -321,7 +322,7 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
</spark-opts>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
@ -351,7 +352,7 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
</spark-opts>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>

View File

@ -49,10 +49,10 @@
<action name="reset_outputpath">
<fs>
<delete path="${graphPath}/datasource"/>
<delete path="${graphPath}/organization"/>
<delete path="${graphPath}/project"/>
<delete path="${graphPath}/relation"/>
<delete path="${targetPath}/datasource"/>
<delete path="${targetPath}/organization"/>
<delete path="${targetPath}/project"/>
<delete path="${targetPath}/relation"/>
</fs>
<ok to="copy_datasource"/>
<error to="Kill"/>

View File

@ -1250,6 +1250,21 @@ class MappersTest {
System.out.println("***************");
}
@Test
void testIRISPub() throws IOException, DocumentException {
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
final Publication p = (Publication) list.get(0);
assertNotNull(p.getInstance().get(0).getUrl().get(0));
assertValidId(p.getId());
System.out.println(p.getInstance().get(0).getUrl());
p.getPid().forEach(x -> System.out.println(x.getValue()));
p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue()));
}
@Test
void testNotWellFormed() throws IOException {
final String xml = IOUtils

View File

@ -0,0 +1,215 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<oai:header xmlns="http://namespace.openaire.eu/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<identifier>oai:air.unimi.it:2434/907506</identifier>
<datestamp>2024-01-04T12:42:51Z</datestamp>
<setSpec>com_2434_73555</setSpec>
<setSpec>col_2434_73557</setSpec>
<setSpec>openaire</setSpec>
<dr:dateOfTransformation>2024-01-29T16:56:50.632Z</dr:dateOfTransformation>
<dri:objIdentifier>od______1261::ff2d9e058e7bea90a27f41c31078e601</dri:objIdentifier>
<dri:recordIdentifier>oai:air.unimi.it:2434/907506</dri:recordIdentifier>
<dri:dateOfCollection/>
<dri:mdFormat/>
<dri:mdFormatInterpretation/>
<dri:repositoryId/>
<oaf:datasourceprefix> od______1261</oaf:datasourceprefix>
</oai:header>
<metadata>
<oaire:resource xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:exslt="http://exslt.org/common"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:rdf="http://www.w3.org/TR/rdf-concepts/"
xmlns:doc="http://www.lyncode.com/xoai"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"
xmlns="http://www.openarchives.org/OAI/2.0/"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<datacite:titles>
<datacite:title xml:lang="en">Ensuring tests of conservation interventions build on existing literature</datacite:title>
</datacite:titles>
<datacite:creator>
<datacite:creator>
<datacite:creatorName>W.J. Sutherland</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>S.T. Alvarez-Castaneda</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>T. Amano</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>R. Ambrosini</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>P. Atkinson</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>J.M. Baxter</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>A.L. Bond</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>P.J. Boon</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>K.L. Buchanan</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>J. Barlow</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>G. Bogliani</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>O.M. Bragg</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M. Burgman</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M.W. Cadotte</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M. Calver</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>S.J. Cooke</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>R.T. Corlett</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>V. Devictor</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>J.G. Ewen</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M. Fisher</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>G. Freeman</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>E. Game</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>B.J. Godley</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>C. Gortazar</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>I.R. Hartley</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>D.L. Hawksworth</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>K.A. Hobson</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M.-. Lu</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>B. Martin-Lopez</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>K. Ma</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>A. Machado</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>D. Mae</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M. Mangiacotti</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>D.J. Mccafferty</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>V. Melfi</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>S. Molur</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>A.J. Moore</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>S.D. Murphy</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>D. Norri</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>A.P.E. van Oudenhoven</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>J. Power</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>E.C. Ree</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>M.W. Schwartz</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>I. Storch</datacite:creatorName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName>C. Wordley</datacite:creatorName>
</datacite:creator>
</datacite:creator>
<datacite:relatedIdentifiers>
</datacite:relatedIdentifiers>
<datacite:dates>
<datacite:date dateType="Accepted">2020</datacite:date>
<datacite:date dateType="Issued">2020</datacite:date>
<datacite:date dateType="Available">2022-06-20</datacite:date>
</datacite:dates>
<dc:language>eng</dc:language>
<dc:publisher>Wiley Blackwell Publishing</dc:publisher>
<oaire:resourceType resourceTypeGeneral="literature"
uri="http://purl.org/coar/resource_type/c_6501">journal article</oaire:resourceType>
<dc:format>application/pdf</dc:format>
<datacite:identifier xmlns:datacite="http://datacite.org/schema/kernel-3"
identifierType="Handle">2434/907506</datacite:identifier>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
<datacite:subjects>
<datacite:subject>Conservation of Natural Resources</datacite:subject>
</datacite:subjects>
<datacite:sizes/>
<datacite:sizes/>
<datacite:sizes>
<datacite:size>191802 bytes</datacite:size>
</datacite:sizes>
<oaire:file accessRightsURI="" mimeType="application/pdf" objectType="fulltext">https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaire:file>
</oaire:resource>
<oaf:identifier identifierType="DOI">10.1111/cobi.13555</oaf:identifier>
<oaf:identifier identifierType="PMID">32779884</oaf:identifier>
<oaf:fulltext>https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaf:fulltext>
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
<oaf:dateAccepted>2020-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language>eng</oaf:language>
<oaf:hostedBy name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
id="opendoar____::1261"/>
<oaf:collectedFrom name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
id="opendoar____::1261"/>
</metadata>
</record>

View File

@ -244,4 +244,27 @@ public class XmlRecordFactoryTest {
}
@Test
public void testIrisGuidelines4() throws DocumentException, IOException {
final ContextMapper contextMapper = new ContextMapper();
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
final Publication p = OBJECT_MAPPER
.readValue(
IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
Publication.class);
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
assertNotNull(xml);
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
}
}

File diff suppressed because one or more lines are too long

View File

@ -39,6 +39,7 @@
<module>dhp-doiboost</module>
<module>dhp-impact-indicators</module>
<module>dhp-swh</module>
<module>dhp-continuous-validation</module>
</modules>
<pluginRepositories>

View File

@ -204,6 +204,12 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>uoa-validator-engine2</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>