Merge remote-tracking branch 'main/beta' into continuous_validation2

[orcid enrichment] fixed directory cleanup before distcp
- Add documentation.
2024-02-05 11:57:45 +02:00 · 2024-02-05 09:45:36 +02:00 · 2024-02-02 14:18:46 +02:00 · 2024-01-31 13:01:42 +02:00 · 2024-01-30 19:20:14 +01:00 · 2024-01-29 18:19:48 +01:00
20 changed files with 794 additions and 29 deletions
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
@ -7,10 +7,10 @@ import java.util.HashMap;
 import java.util.Map;

 import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

 import eu.dnetlib.pace.model.Person;
-import jdk.nashorn.internal.ir.annotations.Ignore;

 public class UtilTest {

@ -22,7 +22,7 @@ public class UtilTest {
 	}

 	@Test
-	@Ignore
+	@Disabled
 	public void paceResolverTest() {
 		PaceResolver paceResolver = new PaceResolver();
 		paceResolver.getComparator("keywordMatch", params);
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
@ -19,10 +19,7 @@ import lombok.Getter;
 import lombok.NoArgsConstructor;
 import lombok.Setter;

-@NoArgsConstructor
-@AllArgsConstructor
-@Getter
-@Setter
+
 public class BipProjectModel {
 	String projectId;

@ -34,6 +31,17 @@ public class BipProjectModel {

 	String totalCitationCount;

+	public BipProjectModel() {
+	}
+
+	public BipProjectModel(String projectId, String numOfInfluentialResults, String numOfPopularResults, String totalImpulse, String totalCitationCount) {
+		this.projectId = projectId;
+		this.numOfInfluentialResults = numOfInfluentialResults;
+		this.numOfPopularResults = numOfPopularResults;
+		this.totalImpulse = totalImpulse;
+		this.totalCitationCount = totalCitationCount;
+	}
+
 	// each project bip measure has exactly one value, hence one key-value pair
 	private Measure createMeasure(String measureId, String measureValue) {

@ -71,4 +79,43 @@ public class BipProjectModel {
 				createMeasure("totalCitationCount", totalCitationCount));
 	}

+	public String getProjectId() {
+		return projectId;
+	}
+
+	public void setProjectId(String projectId) {
+		this.projectId = projectId;
+	}
+
+	public String getNumOfInfluentialResults() {
+		return numOfInfluentialResults;
+	}
+
+	public void setNumOfInfluentialResults(String numOfInfluentialResults) {
+		this.numOfInfluentialResults = numOfInfluentialResults;
+	}
+
+	public String getNumOfPopularResults() {
+		return numOfPopularResults;
+	}
+
+	public void setNumOfPopularResults(String numOfPopularResults) {
+		this.numOfPopularResults = numOfPopularResults;
+	}
+
+	public String getTotalImpulse() {
+		return totalImpulse;
+	}
+
+	public void setTotalImpulse(String totalImpulse) {
+		this.totalImpulse = totalImpulse;
+	}
+
+	public String getTotalCitationCount() {
+		return totalCitationCount;
+	}
+
+	public void setTotalCitationCount(String totalCitationCount) {
+		this.totalCitationCount = totalCitationCount;
+	}
 }
--- a/dhp-workflows/dhp-continuous-validation/README.md
+++ b/dhp-workflows/dhp-continuous-validation/README.md
@ -0,0 +1,33 @@
+# Continuous Validation
+
+This module is responsible for deploying an **Oozie Workflow** (on the desired cluster), which executes a **Spark** action.<br>
+This action takes the HDFS-path of a directory of parquet files containing metadata records, and applies the validation process on all of them, in parallel. Then it outputs the results, in json-format, in the given directory.<br>
+The validation process is powered by the "[**uoa-validator-engine2**](https://code-repo.d4science.org/MaDgIK/uoa-validator-engine2)" software,
+which is included as a dependency inside the main "pom.xml" file.<br>
+
+
+### Configure the workflow
+
+Add the wanted values for each of the parameters, defined in the "/src/main/resources/eu/dnetlib/dhp/continuous_validator/oozie_app/workflow.xml" file.<br>
+The most important parameters are the following:
+- ***parquet_path***: the input parquet
+- ***openaire_guidelines***: valid values: "4.0", "3.0", "2.0", "fair_data", "fair_literature_v4"
+- ***output_path***: Be careful to use a base directory which is different from the one that this module is running on, as during a new deployment, that base directory will be deleted.
+
+
+### Install the project and then deploy and run the workflow
+
+Run the **./installProject.sh** script and then the **./runOozieWorkflow.sh** script.<br>
+
+Use the "workflow-id" displayed by the "runOozieWorkflow.sh" script to check the running status and logs, in the remote machine, as follows:
+- Check the status: `oozie job -oozie http://<cluster's domain and port>/oozie -info <Workflow-ID>`
+- Copy the "Job-id" from the output of the above command (numbers with ONE underscore between them).
+- Check the job's logs (not the app's logs!): `yarn logs -applicationId application_<Job-ID>`
+<br><br>
+
+**Note**:<br>
+If you encounter any "java.lang.NoSuchFieldError" issues in the logs, rerun using the following steps:
+- Delete some remote directories related to the workflow in your user's dir: /user/<userName>/
+  - ***.sparkStaging***
+  - ***oozie-oozi***
+- Run the **./installProject.sh** script and then the **./runOozieWorkflow.sh** script.
--- a/dhp-workflows/dhp-continuous-validation/installProject.sh
+++ b/dhp-workflows/dhp-continuous-validation/installProject.sh
@ -0,0 +1,18 @@
+# Install the whole "dnet-hadoop" project.
+
+# Delete this module's previous build-files in order to avoid any conflicts.
+rm -rf target/ ||
+
+# Go to the root directory of this project.
+cd ../../
+
+# Select the build profile.
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+# Install the project.
+mvn clean install -U ${CHOSEN_MAVEN_PROFILE} -Dmaven.test.skip=true
+
+# We skip tests for all modules, since the take a big amount of time and some of them fail.
+# Any test added to this module, will be executed in the "runOozieWorkflow.sh" script.
--- a/dhp-workflows/dhp-continuous-validation/pom.xml
+++ b/dhp-workflows/dhp-continuous-validation/pom.xml
@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>dhp-workflows</artifactId>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <version>1.2.5-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>dhp-continuous-validation</artifactId>
+    <!-- The "version" is inherited from the parent module. -->
+
+    <dependencies>
+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>eu.dnetlib</groupId>
+            <artifactId>uoa-validator-engine2</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        </dependency>
+
+    </dependencies>
+
+</project>
--- a/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh
+++ b/dhp-workflows/dhp-continuous-validation/runOozieWorkflow.sh
@ -0,0 +1,14 @@
+# This script deploys and runs the oozie workflow on the cluster, defined in the "~/.dhp/application.properties" file.
+
+# Select the build profile.
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+# Build and deploy this module.
+mvn clean package -U ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
+      -Dworkflow.source.dir=eu/dnetlib/dhp/continuous_validation
+
+# Show the Oozie-job-ID.
+echo -e "\n\nShowing the contents of \"extract-and-run-on-remote-host.log\":\n"
+cat ./target/extract-and-run-on-remote-host.log
--- a/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validation/ContinuousValidation.java
+++ b/dhp-workflows/dhp-continuous-validation/src/main/java/eu/dnetlib/dhp/continuous_validation/ContinuousValidation.java
@ -0,0 +1,144 @@
+
+package eu.dnetlib.dhp.continuous_validation;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.validator2.validation.StandardValidationResult;
+import eu.dnetlib.validator2.validation.XMLApplicationProfile;
+import eu.dnetlib.validator2.validation.guideline.Guideline;
+import eu.dnetlib.validator2.validation.guideline.StandardResult;
+import eu.dnetlib.validator2.validation.guideline.openaire.*;
+import eu.dnetlib.validator2.validation.utils.TestUtils;
+
+public class ContinuousValidation {
+
+	private static final org.slf4j.Logger logger = LoggerFactory.getLogger(ContinuousValidation.class);
+	private static final String parametersFile = "input_continuous_validation_parameters.json";
+
+	public static void main(String[] args) {
+
+		ArgumentApplicationParser parser = null;
+		Boolean isSparkSessionManaged = false;
+		String parquetPath = null;
+		String guidelines = null;
+		String outputPath = null;
+
+		try {
+			String jsonConfiguration = IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							ContinuousValidation.class
+								.getResourceAsStream("/eu/dnetlib/dhp/continuous_validation/" + parametersFile)),
+					StandardCharsets.UTF_8);
+
+			parser = new ArgumentApplicationParser(jsonConfiguration);
+			parser.parseArgument(args);
+		} catch (Exception e) {
+			logger.error("Error when parsing the parameters!", e);
+			return;
+		}
+
+		isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged")) // This param is not mandatory, so it may be null.
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		// This is needed to implement a unit test in which the spark session is created in the context of the
+		// unit test itself rather than inside the spark application"
+
+		// Set the parquet input, either a parquet-file or a directory with parquet files.
+		parquetPath = parser.get("parquet_path");
+		if (parquetPath == null) {
+			logger.error("The \"parquet_path\" was not retrieved from the parameters file: " + parametersFile);
+			return;
+		}
+
+		guidelines = parser.get("openaire_guidelines");
+		if (guidelines == null) {
+			logger.error("The \"openaire_guidelines\" was not retrieved from the parameters file: " + parametersFile);
+			return;
+		}
+
+		outputPath = parser.get("output_path");
+		if (outputPath == null) {
+			logger.error("The \"output_path\" was not retrieved from the parameters file: " + parametersFile);
+			return;
+		}
+
+		if (!outputPath.endsWith(File.separator))
+			outputPath += File.separator;
+
+		logger
+			.info(
+				"Will validate the contents of parquetFile: \"" + parquetPath + "\", against guidelines: \""
+					+ guidelines + "\"" + " and will output the results in the outputPath: " + outputPath);
+
+		AbstractOpenAireProfile profile;
+		switch (guidelines) {
+			case "4.0":
+				profile = new LiteratureGuidelinesV4Profile();
+				break;
+			case "3.0":
+				profile = new LiteratureGuidelinesV3Profile();
+				break;
+			case "2.0":
+				profile = new DataArchiveGuidelinesV2Profile();
+				break;
+			case "fair_data":
+				profile = new FAIR_Data_GuidelinesProfile();
+				break;
+			case "fair_literature_v4":
+				profile = new FAIR_Literature_GuidelinesV4Profile();
+				break;
+			default:
+				logger.error("Invalid OpenAIRE Guidelines were given: " + guidelines);
+				return;
+		}
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(ContinuousValidation.class.getSimpleName());
+		conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+		conf.registerKryoClasses(new Class[] {
+			XMLApplicationProfile.ValidationResult.class, Guideline.Result.class, StandardValidationResult.class,
+			StandardResult.class
+		});
+		String finalParquetPath = parquetPath;
+		String finalOutputPath = outputPath;
+
+		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
+			// Use a new instance of Document Builder in each worker, as it is not thread-safe.
+			MapFunction<Row, XMLApplicationProfile.ValidationResult> validateMapFunction = row -> profile
+				.validate(
+					row.getAs("id").toString(),
+					TestUtils
+						.getDocumentBuilder()
+						.parse(IOUtils.toInputStream(row.getAs("body").toString(), StandardCharsets.UTF_8)));
+
+			spark
+				.read()
+				.parquet(finalParquetPath)
+				.filter("encoding = 'XML' and id is not NULL and body is not NULL")
+				.map(validateMapFunction, Encoders.bean(XMLApplicationProfile.ValidationResult.class))
+				.write()
+				.option("compression", "gzip")
+				.mode(SaveMode.Overwrite)
+				.json(finalOutputPath); // The output files have names of the input files which were processed.
+		});
+	}
+}
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/input_continuous_validation_parameters.json
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/input_continuous_validation_parameters.json
@ -0,0 +1,26 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "prq_file",
+    "paramLongName": "parquet_path",
+    "paramDescription": "the full path of a parquet-file or a directory with parquet files, to be processed",
+    "paramRequired": true
+  },
+  {
+    "paramName": "guidelines",
+    "paramLongName": "openaire_guidelines",
+    "paramDescription": "the version of the OpenAIRE Guidelines to validate the records against",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "output_path",
+    "paramDescription": "the path of the output-directory where the result-json-files will be stored",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/oozie_app/config-default.xml
@ -0,0 +1,22 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>sparkExecutorMemoryOverhead</name>
+        <value>1G</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/eu/dnetlib/dhp/continuous_validation/oozie_app/workflow.xml
@ -0,0 +1,115 @@
+<workflow-app name="Validate metadata records against OpenAIRE Guidelines" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>parquet_path</name>
+            <value>/var/lib/dnet/mdstore_PROD/md-7763c517-538d-4aa7-83f8-6096b3ce0d96/md-7763c517-538d-4aa7-83f8-6096b3ce0d96-1702622132535/store</value>
+            <description>the full path of a parquet-file or a directory with parquet files, to be processed</description>
+        </property>
+        <property>
+            <name>openaire_guidelines</name>
+            <value>4.0</value>
+            <description>the version of the OpenAIRE Guidelines to validate the records against</description>
+        </property>
+        <property>
+            <name>output_path</name>
+            <value>/user/${dhp.hadoop.frontend.user.name}/continuous_validation/output</value>
+            <description>the path of the output-directory where the result-json-files will be stored</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <value>4096M</value>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <value>2048m</value>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <value>4</value>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <!--<property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>-->
+        <property>
+            <name>spark2EventLogDir</name>
+            <value>spark2/logs</value>
+            <description>spark 2.* event log dir location</description>
+        </property>
+        <property>
+            <name>oozie.libpath</name>
+            <value>${nameNode}/user/oozie/share/lib</value>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="ValidateMetadataRecords"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ValidateMetadataRecords">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Validate multiple records against OpenAIRE Guidelines</name>
+            <class>eu.dnetlib.dhp.continuous_validation.ContinuousValidation</class>
+            <jar>dhp-continuous-validation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <!-- Arguments passed to the "main" method of the class defined above. -->
+            <arg>--parquet_path</arg><arg>${parquet_path}</arg>
+            <arg>--openaire_guidelines</arg><arg>${openaire_guidelines}</arg>
+            <arg>--output_path</arg><arg>${output_path}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-continuous-validation/src/main/resources/logback.xml
+++ b/dhp-workflows/dhp-continuous-validation/src/main/resources/logback.xml
@ -0,0 +1,43 @@
+<configuration debug="false">
+
+    <shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>   <!-- Default delay: 0ms (zero) -->
+
+<!--    <Appenders>-->
+        <appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/ContinuousValidation.log</file>
+
+            <rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
+                <fileNamePattern>logs/ContinuousValidation.%i.log.zip</fileNamePattern>
+                <minIndex>1</minIndex>
+                <maxIndex>10</maxIndex>
+            </rollingPolicy>
+
+            <triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
+                <maxFileSize>100MB</maxFileSize>  <!-- Lower the size to check the "RollingFileAppender" during tests. -->
+            </triggeringPolicy>
+            <encoder>
+                <charset>UTF-8</charset>
+                <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
+            </encoder>
+        </appender>
+
+        <appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
+            <encoder>
+                <charset>UTF-8</charset>
+                <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
+            </encoder>
+        </appender>
+<!--    </Appenders>-->
+
+        <root level="debug">
+            <appender-ref ref="Console" />
+        </root>
+
+        <logger name="eu.dnetlib.dhp.continuous_validation" level="debug"/>
+        <logger name="eu.dnetlib.validator2" level="error"/>
+        <logger name="org.sparkproject" level="info"/>
+        <logger name="org.apache.spark" level="info"/>
+        <logger name="org.apache.hadoop" level="info"/>
+        <logger name="org.apache.parquet" level="info"/>
+
+</configuration>
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java
@ -30,7 +30,6 @@ import eu.dnetlib.dhp.common.collection.HttpClientParams;
 import eu.dnetlib.dhp.schema.orcid.AuthorData;
 import eu.dnetlib.doiboost.orcid.util.DownloadsReport;
 import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector;
-import jdk.nashorn.internal.ir.annotations.Ignore;

 public class OrcidClientTest {
 	final int REQ_LIMIT = 24;
@ -152,7 +151,7 @@ public class OrcidClientTest {
 	}

 	// @Test
-	@Ignore
+	@Disabled
 	private void testModifiedDate() throws ParseException {
 		testDate(toRetrieveDate);
 		testDate(toNotRetrieveDate);
@ -332,7 +331,7 @@ public class OrcidClientTest {
 	}

 	@Test
-	@Ignore
+	@Disabled
 	void testUpdatedRecord() throws Exception {
 		final String base64CompressedRecord = IOUtils
 			.toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64"));
@ -341,7 +340,7 @@ public class OrcidClientTest {
 	}

 	@Test
-	@Ignore
+	@Disabled
 	void testUpdatedWork() throws Exception {
 		final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA==";
 		final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
@ -413,7 +412,7 @@ public class OrcidClientTest {
 	}

 	@Test
-	@Ignore
+	@Disabled
 	void testDownloadedAuthor() throws Exception {
 		final String base64CompressedWork = "H4sIAAAAAAAAAI2Yy26jMBSG932KiD0hIe1MiwiVZjGLkWbX2XRHsFOsgs3YJmnefszFFy4+mUhtVPz9P/gcH/vQ9PWrrjYXzAVh9Bjst7tgg2nBEKEfx+DP28/wOdgImVOUV4ziY3DDInjNHlKOC8ZRMnxtmlyWxyDaqU+ofg7h/uX7IYwfn+Ngo25ARUKoxJzm1TEopWySKLper1vGC4LU74+IikgTWoFRW+SyfyyfxCBag4iQhBawyoGMDjdqJrnECJAZRquYLDEPaV5jv8oyWlXj+qTiXZLGr7KMiQbnjAOR6IY1W7C6hgIwjGt6SKGfHsY13ajHYipLIcIyJ5Xw6+akdvjEtyt4wxEwM6+VGph5N2zYr2ENhQRhKsmZYChmS1j7nFs6VIBPOwImKhyfMVeFg6GAWEjrcoQ4FoBmBGwVXYhagGHDBIEX+ZzUDiqyn35VN6rJUpUJ4zc/PAI2T03FbrUKJZQszWjV3zavVOjvVfoE01qB+YUUQPGNwHTt3luxJjdqh1AxJFBKLWOrSeCcF13RtxxYtlPOPqH6m+MLwVfoMQ2kdae2ArLajc6fTxkI1nIoegs0yB426pMO+0fSw07xDKMu0XKSde5C2VvrlVMijRzFwqY7XTJI1QMLWcmEzMxtDdxfHiYSgTNJnYJ1K9y5k0tUrMgrnGGaRiuXxxuClulYUbr0nBvpkYLjvgTCGsuSoex3f1CEvRPHKI184NJKtKeaiO7cD5E61bJ4F+9DFd7d01u8Tw6H5BBvvz8f3q3nXLGIeJULGdaqeVBBRK7rS7h/fNvvk/gpedxt4923dxP7Fc3KtKuc1BhlkrfYmeN4dcmrhmbw60+HmWw2CKgbTuqc32CXKTTmeTWT6bDBjPsQ0DTpnchdaYO0ayQ2FyLIiVREqs25aU8VKYLRbK0BsyZuqvr1MU2Sm/rDdhe/2CRN6FU/b+oBVyj1zqRtC5F8kAumfTclsl+s7EoNQu64nfOaVLeezX60Z3XCULLi6GI2IZGTEeey7fec9lBAuXawIHKcpifE7GABHWfoxLVfpUNPBXoMbZWrHFsR3bPAk9J9i2sw9nW6AQT1mpk++7JhW+v44Hmt8PomJqfD13jRnvFOSxCKtu6qHoyBbQ7cMFo750UEfGaXm6bEeplXIXj2hvL6mA7tzvIwmM9pbJFBG834POZdLGi2gH2u9u0K9HMwn5PTioFWLufzmrS4oNuU9Pkt2rf/2jMs7fMdm2rQTTM+j+49AzToAVuXYA1mD2k0+XdE9vAP+JYR5NcQAAA=";
 		final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
@ -421,7 +420,7 @@ public class OrcidClientTest {
 	}

 	@Test
-	@Ignore
+	@Disabled
 	void testDownloadedWork() throws Exception {
 		final String base64CompressedWork = "H4sIAAAAAAAAANVa63LiOBb+z1Oo+LVbhbkGAlTCLE1Id9IhTQV6unr/CVvB2tiWR5Khmal5rX2BfbE9ki3b3Jzt6Y13h6pQSPrOXTo6knL10zffQxvCBWXBdbVVb1YRCWzm0GB9Xf28vLX6VSQkDhzssYBcV3dEVH8aVa62jL8M1RcKI2kBAYwNLnrtXrMPFCGW7nW10YSPBX8dq3XRb1swNGgomkaG3FBBV9SjcnddDaOVR+0qApUCMaSBJDzA3nXVlTIcNhrb7bbOuE0d+F43AtEwCENBnMjGUhtyjiSFGBqHCkkDu5gqB0rpSMgJsCJOAVmKMVRMuoRbAfbJeaoMY6h84q8gQi4Nz1NlmNQbnDNe4Ak1bLA28/0iB8TjBg1GMV5gdzxu0CGoxSBKlkMkpp44T3eINBxeyG5bKDABpJb7QF1guRpOsd/iOWRRhwSSPlNS5LNjsOHzHAXxmjlHmwBSr3DyTDgsNVLkkAxk6LDjcCIKaBJAtoo2FCagFTJBiyf5IdJwUAv2PJUaNUgXlgnju/PgBJDFKfTYzgdXFgXLYAzVLxH2wPWvrfQ9mKEVhG+oXbD4EsD+3H1txqaxgQwBPqRFIc0w2WoSBHNbLfqIF0zbfVymIbQ52VCyLVIzBRm6VeQVRFWNHuoHDASLeJH3jqDVUQXB5yrOH0ObE5UNLQe+R+1mu2U1u1Z7sGy2hq3esN2tt5oXf79qnELv8fGwkJYPmxSswD1uA6vVXrY7w+5g2G3WuxedjNsJmj2escJx33G/ZXsU5iAs/AyRR0WcjpRXBLglc0lM1BjP59bX1qw9Hn/+dH87/dy9vBikeinKkyzVHjoqJNWIk7QuE3KU6pES6O7MwsarJh44QW1KowcWOCxAC9tlzEPsGX3YrYGQICgS0JKzENach2bEoTYNyKEQzaJyQnzSqesKSaV3IhRx92L8tLAm7GerjbZUujSwlFnIobqKkTuth+Q4ED4Vqqypp5JyfK8ah5Ji0f8AZVSGT2TZVGXfBLw/liOyqdRpJqfyXr8ldyEZrehKkm8Jr/2hc3Qb7EVk9DfMJbU98pu3k+6aETXXBebCZpt23tBaBUfSZRxdo98eYmgNfRxrh3zAnldDM/37FvZ+IiWtoQfddgiaEGBIDGCG7btA7jgBP9svAK2h90l4yYqIGop5jgMHXA4J0NB9ksR+YTX0qFtfqACO01jGjDHFPx552AW2W0P3uvGROk4NLfTvCeNS8X9MaDg1rL9Qz6PYh7En3f4ZNmKS6nUfQYFmE6PYe05IYBqPFGaq5wHlYpaoDbYqxokVK+JBerz51z+BIzc+SfSdTHVrTiSYtZzGFNOdGrr5ohsLF2+NUguqppkDoua6/S6yXwAYu44pM+/HiZ1BwEDWMqYbC5fjZ+MEBwMjb4PRLdTFYWrUwiUhJH/H+G3pMl/7fjqJhTGwSwU5lnfLsVDmxIPvmRetbJeCOsvfaxWXbXWxLVziqNky51BLW1OP2JKzgNoASSa7Gk1WAfrLI9mirzBBIUD1r/W/AgrMla7CjEMOzYBJolo30/mnxd0SzadPt5+eZtMb9O7rEN1wNINgEA8Ha+IxNMdrHLCQRR4TFRCudnmB7m6GqD0YDCqW+lQqlfnndw93iw/TJ/RwN5k+TqZDNJkAQyUvUlWvktjrdgbQEeI1EapN8Grd7MOeYJlfajSxWVOMfcIhVQXgfcFsqhcceobVA/U3GjsbDCYrjVSKSz0wHo8Xym6dArRvvjsbAfUGouFr8s5lG9o72DVVSy1saDqMqlarWW+12r2GiIXXMzuAU6AQcLLqWf3mZRf6iOlsNQdda9BudhQnvNNdPWN8XA7BgU5G2k3pLADA75XD3BSnn3y+3M90SbZWGczkxiRVmfSaJrd0V8u0yG3CeYRyht7O07Ste45weuqNmhcpLO44woEPRq1eilLN/f3ntEqGPFfzi2PmudHTO3EOEKf60LdTyUeDr7KIIzKfTfqtdr896JxklQtbES/IQD7UyL+SZIJSXYhLHkHZ9oqEjPR1MRzWu550cDYdCeI9n+S4hzouUU76+UeCQJ0fjkKn0+v3m703i0Eh/z97BCDH/XAAziTIt4rH94j7s4dHbSY/HJ90e3qriBQL+MMxCGETs9j/QxiSQ5PaS63/QsZqdS8vOxdvtj7Oc//fL4dTI2LvDAfVA6erSDKe3+cPxw70j4c5HHZlfLT9iAEZYKjZkxOYKZxymJy659l/t+QZllC5bvVJrzShD5GN0/NkiaZyqNcJh0NrdngtTfp7wviaHB+SS1Ng7O+Sk3h5HodT4S8RyY78pUmGM6eEg1l8tVCa1KnvY/SgrzDKsxRLF46j+uahNKH3BE6lsIb1lUxpUhdS3WUE+u6nPP/qiyAsklumMhMz9SBNqeus0oQ+QXqwIa7m3qy87IhXnBLPI8kVXXlZMaASm5vAEqWuKYkvHMtbPdiPiIdm6dVmeVMZjX+lfnKDWmaRAT7ev6ctTfhEF3RoWnJeXlKfSXcHcsf69rk0wTd4Qx30RV9yl5et2Ipwqe/SS5MJXiU8vbIv2b/qZaC8PZ65AUwj9QJR3vx1mQ9b7VPy1FFebnSpWq7xi0qJuwA+fLYpL7rwJdLXobcSa97kM4Cl35f3YXmofp0+8R9gBc/XeXL9Vn38pH7mLTs27z9T8ky1n7ynlZ0I4le78rYzl6t/woG5krwQlpcRcLDD2UPkH5F73C9G5tFKfY0q/wa1TIHI0CgAAA==";
 		final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -114,7 +114,7 @@
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
@ -142,7 +142,7 @@
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
@ -170,7 +170,7 @@
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
@ -198,7 +198,7 @@
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
@ -225,8 +225,8 @@
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/orcid/orcidprop</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
@ -247,9 +247,10 @@
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
-                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=4
+                --executor-memory=4G
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=5G
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -259,9 +260,9 @@
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
-            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
+            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
@ -291,7 +292,7 @@
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
-            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
+            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
@ -321,7 +322,7 @@
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
-            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
+            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
@ -351,7 +352,7 @@
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
-            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
+            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
@ -49,10 +49,10 @@

    <action name="reset_outputpath">
        <fs>
-            <delete path="${graphPath}/datasource"/>
-            <delete path="${graphPath}/organization"/>
-            <delete path="${graphPath}/project"/>
-            <delete path="${graphPath}/relation"/>
+            <delete path="${targetPath}/datasource"/>
+            <delete path="${targetPath}/organization"/>
+            <delete path="${targetPath}/project"/>
+            <delete path="${targetPath}/relation"/>
        </fs>
        <ok to="copy_datasource"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -1250,6 +1250,21 @@ class MappersTest {
 		System.out.println("***************");
 	}

+	@Test
+	void testIRISPub() throws IOException, DocumentException {
+		final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml")));
+		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
+		System.out.println("***************");
+		System.out.println(new ObjectMapper().writeValueAsString(list));
+		System.out.println("***************");
+		final Publication p = (Publication) list.get(0);
+		assertNotNull(p.getInstance().get(0).getUrl().get(0));
+		assertValidId(p.getId());
+		System.out.println(p.getInstance().get(0).getUrl());
+		p.getPid().forEach(x -> System.out.println(x.getValue()));
+		p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue()));
+
+	}
 	@Test
 	void testNotWellFormed() throws IOException {
 		final String xml = IOUtils
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml
@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:datacite="http://datacite.org/schema/kernel-4"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+        xmlns:oaf="http://namespace.openaire.eu/oaf"
+        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
+        xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
+        xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <oai:header xmlns="http://namespace.openaire.eu/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
+        <identifier>oai:air.unimi.it:2434/907506</identifier>
+        <datestamp>2024-01-04T12:42:51Z</datestamp>
+        <setSpec>com_2434_73555</setSpec>
+        <setSpec>col_2434_73557</setSpec>
+        <setSpec>openaire</setSpec>
+        <dr:dateOfTransformation>2024-01-29T16:56:50.632Z</dr:dateOfTransformation>
+
+        <dri:objIdentifier>od______1261::ff2d9e058e7bea90a27f41c31078e601</dri:objIdentifier>
+        <dri:recordIdentifier>oai:air.unimi.it:2434/907506</dri:recordIdentifier>
+        <dri:dateOfCollection/>
+        <dri:mdFormat/>
+        <dri:mdFormatInterpretation/>
+        <dri:repositoryId/>
+        <oaf:datasourceprefix> od______1261</oaf:datasourceprefix>
+    </oai:header>
+    <metadata>
+        <oaire:resource xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
+                        xmlns:exslt="http://exslt.org/common"
+                        xmlns:xs="http://www.w3.org/2001/XMLSchema"
+                        xmlns:rdf="http://www.w3.org/TR/rdf-concepts/"
+                        xmlns:doc="http://www.lyncode.com/xoai"
+                        xmlns:dc="http://purl.org/dc/elements/1.1/"
+                        xmlns:datacite="http://datacite.org/schema/kernel-4"
+                        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                        xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"
+                        xmlns="http://www.openarchives.org/OAI/2.0/"
+                        xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
+            <datacite:titles>
+                <datacite:title xml:lang="en">Ensuring tests of conservation interventions build on existing literature</datacite:title>
+            </datacite:titles>
+            <datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>W.J. Sutherland</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>S.T. Alvarez-Castaneda</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>T. Amano</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>R. Ambrosini</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>P. Atkinson</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>J.M. Baxter</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>A.L. Bond</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>P.J. Boon</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>K.L. Buchanan</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>J. Barlow</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>G. Bogliani</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>O.M. Bragg</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M. Burgman</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M.W. Cadotte</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M. Calver</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>S.J. Cooke</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>R.T. Corlett</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>V. Devictor</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>J.G. Ewen</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M. Fisher</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>G. Freeman</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>E. Game</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>B.J. Godley</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>C. Gortazar</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>I.R. Hartley</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>D.L. Hawksworth</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>K.A. Hobson</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M.-. Lu</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>B. Martin-Lopez</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>K. Ma</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>A. Machado</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>D. Mae</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M. Mangiacotti</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>D.J. Mccafferty</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>V. Melfi</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>S. Molur</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>A.J. Moore</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>S.D. Murphy</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>D. Norri</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>A.P.E. van Oudenhoven</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>J. Power</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>E.C. Ree</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>M.W. Schwartz</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>I. Storch</datacite:creatorName>
+                </datacite:creator>
+                <datacite:creator>
+                    <datacite:creatorName>C. Wordley</datacite:creatorName>
+                </datacite:creator>
+            </datacite:creator>
+            <datacite:relatedIdentifiers>
+            </datacite:relatedIdentifiers>
+            <datacite:dates>
+                <datacite:date dateType="Accepted">2020</datacite:date>
+                <datacite:date dateType="Issued">2020</datacite:date>
+                <datacite:date dateType="Available">2022-06-20</datacite:date>
+            </datacite:dates>
+            <dc:language>eng</dc:language>
+            <dc:publisher>Wiley Blackwell Publishing</dc:publisher>
+            <oaire:resourceType resourceTypeGeneral="literature"
+                                uri="http://purl.org/coar/resource_type/c_6501">journal article</oaire:resourceType>
+            <dc:format>application/pdf</dc:format>
+            <datacite:identifier xmlns:datacite="http://datacite.org/schema/kernel-3"
+                                 identifierType="Handle">2434/907506</datacite:identifier>
+            <datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
+            <datacite:subjects>
+                <datacite:subject>Conservation of Natural Resources</datacite:subject>
+            </datacite:subjects>
+            <datacite:sizes/>
+            <datacite:sizes/>
+            <datacite:sizes>
+                <datacite:size>191802 bytes</datacite:size>
+            </datacite:sizes>
+            <oaire:file accessRightsURI="" mimeType="application/pdf" objectType="fulltext">https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaire:file>
+        </oaire:resource>
+        <oaf:identifier identifierType="DOI">10.1111/cobi.13555</oaf:identifier>
+        <oaf:identifier identifierType="PMID">32779884</oaf:identifier>
+        <oaf:fulltext>https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaf:fulltext>
+        <dr:CobjCategory type="publication">0001</dr:CobjCategory>
+        <oaf:dateAccepted>2020-01-01</oaf:dateAccepted>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:language>eng</oaf:language>
+        <oaf:hostedBy name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
+                      id="opendoar____::1261"/>
+        <oaf:collectedFrom name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
+                           id="opendoar____::1261"/>
+    </metadata>
+</record>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -244,4 +244,27 @@ public class XmlRecordFactoryTest {

 	}

+	@Test
+	public void testIrisGuidelines4() throws DocumentException, IOException {
+		final ContextMapper contextMapper = new ContextMapper();
+
+		final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
+				XmlConverterJob.schemaLocation);
+
+		final Publication p = OBJECT_MAPPER
+				.readValue(
+						IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
+						Publication.class);
+
+		final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
+
+		assertNotNull(xml);
+
+		final Document doc = new SAXReader().read(new StringReader(xml));
+
+		assertNotNull(doc);
+		System.out.println(doc.asXML());
+
+	}
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@ -39,6 +39,7 @@
        <module>dhp-doiboost</module>
        <module>dhp-impact-indicators</module>
        <module>dhp-swh</module>
+        <module>dhp-continuous-validation</module>
    </modules>

    <pluginRepositories>
--- a/pom.xml
+++ b/pom.xml
@ -204,6 +204,12 @@
 				<scope>test</scope>
 			</dependency>

+			<dependency>
+				<groupId>eu.dnetlib</groupId>
+				<artifactId>uoa-validator-engine2</artifactId>
+				<version>2.0.0-SNAPSHOT</version>
+			</dependency>
+
 			<dependency>
 				<groupId>org.slf4j</groupId>
 				<artifactId>jcl-over-slf4j</artifactId>
Author	SHA1	Message	Date
Lampros Smyrnaios	aaa7d3cf86	Merge remote-tracking branch 'main/beta' into continuous_validation2	2024-02-05 11:57:45 +02:00
Claudio Atzori	42f5506306	[orcid enrichment] fixed directory cleanup before distcp	2024-02-05 09:45:36 +02:00
Lampros Smyrnaios	cbe7c6734a	- Add documentation. - Code polishing/cleanup.	2024-02-02 14:18:46 +02:00
Lampros Smyrnaios	b5f4d37827	Merge branch 'beta' of https://code-repo.d4science.org/lsmyrnaios/dnet-hadoop into continuous_validation2	2024-01-31 13:01:42 +02:00
Alessia Bardi	f2a08d8cc2	test for Italian records from IRS repositories	2024-01-30 19:20:14 +01:00
Miriam Baglioni	a5995ab557	[orcid-enrichment] change the value of parameters.	2024-01-29 18:19:48 +01:00
Lampros Smyrnaios	8d0ed7d414	Continuous-Validation updates: - Update the "uoa-validator-engine2" dependency. - Update the "installProject.sh" script to account for potential conflict with previous builds. - Add documentation.	2024-01-25 18:23:14 +02:00
Lampros Smyrnaios	eee085c104	Force a check for updated releases and snapshots on remote repositories, when building the Continuous-Validation workflow.	2024-01-24 14:08:32 +02:00
Lampros Smyrnaios	ff47a941f5	- Add the "installProject.sh" script. - Show the Job-ID or potential deployment-error-logs, right after the deployment of the workflow. - Code polishing.	2024-01-18 18:06:50 +02:00
Lampros Smyrnaios	00644ef487	- Fix the "NoSuchFieldError", caused by library-conflicts, by introducing the "oozie.libpath" property in "workflow.xml". - Fix the value of the "outputPath" property, in "workflow.xml".	2024-01-18 15:46:27 +02:00
Lampros Smyrnaios	23ec57c670	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into continuous_validation2	2024-01-17 18:16:11 +02:00
Lampros Smyrnaios	c17834dddf	- Use "KryoSerializer" for Spark and register some "result" classes. - Code polishing and cleanup.	2024-01-17 18:08:05 +02:00
Lampros Smyrnaios	32e02247bc	Merge branch 'continuous_validation2' of https://code-repo.d4science.org/lsmyrnaios/dnet-hadoop into continuous_validation2	2024-01-09 17:05:07 +02:00
Lampros Smyrnaios	eaa070f1e6	Code cleanup.	2024-01-09 17:03:35 +02:00
Claudio Atzori	fc35b44e22	bumped version of uoa-validator-engine2 to 0.9.3	2024-01-09 15:56:56 +01:00
Lampros Smyrnaios	17282ea8fc	- Fix the "is not NULL" checks inside "spark.filter()" - Make sure the "outputPath" ends with a "/", in any case. - Fix a parameter-description.	2023-12-20 15:15:56 +02:00
Claudio Atzori	22fa60c3dd	removed lib biinary	2023-12-18 15:50:29 +01:00
Claudio Atzori	24173d7a0b	continuous validation WIP	2023-12-18 15:46:36 +01:00
Lampros Smyrnaios	a2feda6c07	- Fix acquiring the "openaire_guidelines" parameter. - Use the right Guidelines-profile, depending on the "openaire_guidelines" version. - Update log-levels. - Optimize imports.	2023-12-18 10:57:40 +02:00
Lampros Smyrnaios	b71633fd7f	- Fix the location of the "input_continuous_validator_parameters.json" file. - Fix handing the "isSparkSessionManaged" parameter. - Add the "provided" scope for some dependencies. They do not inherit it from the main pom, since the "version" tag is declared, even though the value is the same as the one from the main pom. - Code polishing / cleanup.	2023-12-15 18:29:38 +02:00
Lampros Smyrnaios	9e6a03e4e2	Initial commit of the "dhp-continuous-validation" module.	2023-12-15 15:53:31 +02:00