Merge branch 'beta' of https://code-repo.d4science.org/andreas.czerniak/BrBETA_dnet-hadoop into beta

adding xsl+records
refine of rateLimit, add fileGZip,dris2db.xsl
2022-01-26 10:45:27 +01:00 · 2022-01-26 10:22:23 +01:00 · 2022-01-26 10:22:23 +01:00 · 2022-01-25 17:00:05 +01:00 · 2022-01-24 18:02:07 +01:00 · 2022-01-21 11:05:25 +01:00
1500 changed files with 126702 additions and 48443 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,10 +3,10 @@
 *.iws
 *.ipr
 *.iml
-*.ipr
-*.iws
 *~
 .vscode
+.metals
+.bloop
 .classpath
 /*/.classpath
 /*/*/.classpath
@ -24,4 +24,5 @@
 spark-warehouse
 /**/job-override.properties
 /**/*.log
-
+/**/.factorypath
+/**/.scalafmt.conf
--- a/README.md
+++ b/README.md
@ -1,2 +1,2 @@
 # dnet-hadoop
-Dnet-hadoop is a tool for
+Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning.
--- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
@ -8,8 +8,6 @@ import java.util.List;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.maven.plugin.AbstractMojo;
-import org.apache.maven.plugin.MojoExecutionException;
-import org.apache.maven.plugin.MojoFailureException;

 /**
 * Generates oozie properties which were not provided from commandline.
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
 	};

 	@Override
-	public void execute() throws MojoExecutionException, MojoFailureException {
+	public void execute() {
 		if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
 			&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
 			String generatedSandboxName = generateSandboxName(
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
 	/**
 	 * Generates sandbox name from workflow source directory.
 	 *
-	 * @param wfSourceDir
+	 * @param wfSourceDir workflow source directory
 	 * @return generated sandbox name
 	 */
 	private String generateSandboxName(String wfSourceDir) {
 		// utilize all dir names until finding one of the limiters
-		List<String> sandboxNameParts = new ArrayList<String>();
+		List<String> sandboxNameParts = new ArrayList<>();
 		String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
 		ArrayUtils.reverse(tokens);
 		if (tokens.length > 0) {
 			for (String token : tokens) {
 				for (String limiter : limiters) {
 					if (limiter.equals(token)) {
-						return sandboxNameParts.size() > 0
+						return !sandboxNameParts.isEmpty()
 							? StringUtils.join(sandboxNameParts.toArray())
 							: null;
 					}
 				}
-				if (sandboxNameParts.size() > 0) {
+				if (!sandboxNameParts.isEmpty()) {
 					sandboxNameParts.add(0, File.separator);
 				}
 				sandboxNameParts.add(0, token);
--- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
@ -16,6 +16,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
 	 */
 	protected List<String> getEscapeChars(String escapeChars) {
 		List<String> tokens = getListFromCSV(escapeChars);
-		List<String> realTokens = new ArrayList<String>();
+		List<String> realTokens = new ArrayList<>();
 		for (String token : tokens) {
 			String realToken = getRealToken(token);
 			realTokens.add(realToken);
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
 	 * @return content
 	 */
 	protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
-		List<String> names = new ArrayList<String>(properties.stringPropertyNames());
+		List<String> names = new ArrayList<>(properties.stringPropertyNames());
 		Collections.sort(names);
 		StringBuilder sb = new StringBuilder();
 		if (!StringUtils.isBlank(comment)) {
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
 		throws MojoExecutionException {
 		try {
 			String content = getContent(comment, properties, escapeTokens);
-			FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
+			FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
 		} catch (IOException e) {
 			throw new MojoExecutionException("Error creating properties file", e);
 		}
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
 	 */
 	protected static final List<String> getListFromCSV(String csv) {
 		if (StringUtils.isBlank(csv)) {
-			return new ArrayList<String>();
+			return new ArrayList<>();
 		}
-		List<String> list = new ArrayList<String>();
+		List<String> list = new ArrayList<>();
 		String[] tokens = StringUtils.split(csv, ",");
 		for (String token : tokens) {
 			list.add(token.trim());
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;

 /** @author mhorst, claudio.atzori */
-public class GenerateOoziePropertiesMojoTest {
+class GenerateOoziePropertiesMojoTest {

 	private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();

 	@BeforeEach
-	public void clearSystemProperties() {
+	void clearSystemProperties() {
 		System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
 		System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
 	}

 	@Test
-	public void testExecuteEmpty() throws Exception {
+	void testExecuteEmpty() throws Exception {
 		// execute
 		mojo.execute();

@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
 	}

 	@Test
-	public void testExecuteSandboxNameAlreadySet() throws Exception {
+	void testExecuteSandboxNameAlreadySet() throws Exception {
 		// given
 		String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
 		String sandboxName = "originalSandboxName";
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
 	}

 	@Test
-	public void testExecuteEmptyWorkflowSourceDir() throws Exception {
+	void testExecuteEmptyWorkflowSourceDir() throws Exception {
 		// given
 		String workflowSourceDir = "";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
 	}

 	@Test
-	public void testExecuteNullSandboxNameGenerated() throws Exception {
+	void testExecuteNullSandboxNameGenerated() throws Exception {
 		// given
 		String workflowSourceDir = "eu/dnetlib/dhp/";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
 	}

 	@Test
-	public void testExecute() throws Exception {
+	void testExecute() throws Exception {
 		// given
 		String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
 	}

 	@Test
-	public void testExecuteWithoutRoot() throws Exception {
+	void testExecuteWithoutRoot() throws Exception {
 		// given
 		String workflowSourceDir = "wf/transformers";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;

 /** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class)
-public class WritePredefinedProjectPropertiesTest {
+class WritePredefinedProjectPropertiesTest {

 	@Mock
 	private MavenProject mavenProject;
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
 	// ----------------------------------- TESTS ---------------------------------------------

 	@Test
-	public void testExecuteEmpty() throws Exception {
+	void testExecuteEmpty() throws Exception {
 		// execute
 		mojo.execute();

@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithProjectProperties() throws Exception {
+	void testExecuteWithProjectProperties() throws Exception {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test()
-	public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
+	void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
+	void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
+	void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
+	void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
+	void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
 		throws Exception {
 		// given
 		String key = "projectPropertyKey";
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromBlankLocation() {
+	void testExecuteIncludingPropertyKeysFromBlankLocation() {
 		// given
 		String key = "projectPropertyKey";
 		String value = "projectPropertyValue";
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
+	void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
 		throws Exception {
 		// given
 		String key = "projectPropertyKey";
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
+	void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
 		throws Exception {
 		// given
 		String key = "projectPropertyKey";
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
+	void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
 		// given
 		mojo.setQuiet(true);
 		mojo.setIncludePropertyKeysFromFiles(new String[] {
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteIncludingPropertyKeysFromInvalidFile() {
+	void testExecuteIncludingPropertyKeysFromInvalidFile() {
 		// given
 		mojo.setIncludePropertyKeysFromFiles(new String[] {
 			"invalid location"
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
+	void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
 		// given
 		mojo.setIncludeEnvironmentVariables(true);

@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
+	void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
 		// given
 		String key = "systemPropertyKey";
 		String value = "systemPropertyValue";
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
 	}

 	@Test
-	public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
+	void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
 		throws Exception {
 		// given
 		String key = "systemPropertyKey ";
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@ -15,16 +15,27 @@
        <snapshotRepository>
            <id>dnet45-snapshots</id>
            <name>DNet45 Snapshots</name>
-            <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
+            <url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
            <layout>default</layout>
        </snapshotRepository>
        <repository>
            <id>dnet45-releases</id>
-            <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
+            <url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
        </repository>
+        <site>
+            <id>DHPSite</id>
+            <url>${dhp.site.stage.path}/dhp-build/dhp-code-style</url>
+        </site>
    </distributionManagement>

    <build>
+        <extensions>
+            <extension>
+                <groupId>org.apache.maven.wagon</groupId>
+                <artifactId>wagon-ssh</artifactId>
+                <version>2.10</version>
+            </extension>
+        </extensions>
        <pluginManagement>
            <plugins>
                <plugin>
@ -35,7 +46,7 @@
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-site-plugin</artifactId>
-                    <version>3.7.1</version>
+                    <version>3.9.1</version>
                </plugin>
            </plugins>
        </pluginManagement>
@ -43,6 +54,7 @@

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
    </properties>

 </project>
--- a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf
+++ b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf
@ -0,0 +1,21 @@
+style = defaultWithAlign
+
+align.openParenCallSite = false
+align.openParenDefnSite = false
+align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
+continuationIndent.callSite = 2
+continuationIndent.defnSite = 2
+danglingParentheses = true
+indentOperator = spray
+maxColumn = 120
+newlines.alwaysBeforeTopLevelStatements = true
+project.excludeFilters = [".*\\.sbt"]
+rewrite.rules = [AvoidInfix]
+rewrite.rules = [ExpandImportSelectors]
+rewrite.rules = [RedundantBraces]
+rewrite.rules = [RedundantParens]
+rewrite.rules = [SortImports]
+rewrite.rules = [SortModifiers]
+rewrite.rules = [PreferCurlyFors]
+spaces.inImportCurlyBraces = false
+unindentTopLevelOperators = true
--- a/dhp-build/dhp-code-style/src/site/site.xml
+++ b/dhp-build/dhp-code-style/src/site/site.xml
@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
+         name="DHP-Aggregation">
+    <skin>
+        <groupId>org.apache.maven.skins</groupId>
+        <artifactId>maven-fluido-skin</artifactId>
+        <version>1.8</version>
+    </skin>
+    <poweredBy>
+        <logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
+              img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
+    </poweredBy>
+    <body>
+        <links>
+            <item name="Code" href="https://code-repo.d4science.org/" />
+        </links>
+        <menu ref="modules" />
+        <menu ref="reports"/>
+    </body>
+</project>
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@ -10,6 +10,9 @@
 	<packaging>pom</packaging>

 	<description>This module is a container for the build tools used in dnet-hadoop</description>
+	<properties>
+		<maven.javadoc.skip>true</maven.javadoc.skip>
+	</properties>

 	<modules>
 		<module>dhp-code-style</module>
@ -17,4 +20,12 @@
 		<module>dhp-build-properties-maven-plugin</module>
 	</modules>

+
+ <distributionManagement>
+	 <site>
+		 <id>DHPSite</id>
+		 <url>${dhp.site.stage.path}/dhp-build/</url>
+	 </site>
+ </distributionManagement>
+
 </project>
--- a/dhp-build/src/site/site.xml
+++ b/dhp-build/src/site/site.xml
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
+         name="DHP-Aggregation">
+    <skin>
+        <groupId>org.apache.maven.skins</groupId>
+        <artifactId>maven-fluido-skin</artifactId>
+        <version>1.8</version>
+    </skin>
+    <poweredBy>
+        <logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
+              img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
+    </poweredBy>
+    <body>
+        <links>
+            <item name="Code" href="https://code-repo.d4science.org/" />
+        </links>
+
+        <menu ref="modules" />
+        <menu ref="reports"/>
+    </body>
+</project>
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -6,13 +6,58 @@
 		<groupId>eu.dnetlib.dhp</groupId>
 		<artifactId>dhp</artifactId>
 		<version>1.2.4-SNAPSHOT</version>
-		<relativePath>../</relativePath>
+		<relativePath>../pom.xml</relativePath>
+
 	</parent>

 	<artifactId>dhp-common</artifactId>
 	<packaging>jar</packaging>

+	<distributionManagement>
+		<site>
+			<id>DHPSite</id>
+			<url>${dhp.site.stage.path}/dhp-common</url>
+		</site>
+	</distributionManagement>
+
 	<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>net.alchim31.maven</groupId>
+				<artifactId>scala-maven-plugin</artifactId>
+				<version>${net.alchim31.maven.version}</version>
+				<executions>
+					<execution>
+						<id>scala-compile-first</id>
+						<phase>initialize</phase>
+						<goals>
+							<goal>add-source</goal>
+							<goal>compile</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>scala-test-compile</id>
+						<phase>process-test-resources</phase>
+						<goals>
+							<goal>testCompile</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>scala-doc</id>
+						<phase>process-resources</phase> <!-- or wherever -->
+						<goals>
+							<goal>doc</goal>
+						</goals>
+					</execution>
+				</executions>
+				<configuration>
+					<scalaVersion>${scala.version}</scalaVersion>
+				</configuration>
+			</plugin>
+		</plugins>
+
+	</build>

 	<dependencies>

@ -20,6 +65,15 @@
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>com.github.sisyphsu</groupId>
+			<artifactId>dateparser</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>me.xuender</groupId>
+			<artifactId>unidecode</artifactId>
+		</dependency>
+
 		<dependency>
 			<groupId>org.apache.spark</groupId>
 			<artifactId>spark-core_2.11</artifactId>
@ -29,12 +83,6 @@
 			<artifactId>spark-sql_2.11</artifactId>
 		</dependency>

-		<dependency>
-			<groupId>eu.dnetlib.dhp</groupId>
-			<artifactId>dhp-schemas</artifactId>
-			<version>${project.version}</version>
-		</dependency>
-
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
@ -59,11 +107,6 @@
 			<groupId>com.fasterxml.jackson.core</groupId>
 			<artifactId>jackson-databind</artifactId>
 		</dependency>
-		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
-		<dependency>
-			<groupId>com.rabbitmq</groupId>
-			<artifactId>amqp-client</artifactId>
-		</dependency>
 		<dependency>
 			<groupId>net.sf.saxon</groupId>
 			<artifactId>Saxon-HE</artifactId>
@ -104,10 +147,24 @@
 			<artifactId>dnet-pace-core</artifactId>
 		</dependency>

+		<dependency>
+			<groupId>org.apache.httpcomponents</groupId>
+			<artifactId>httpclient</artifactId>
+		</dependency>
+
+		<dependency>
+			<groupId>org.mongodb</groupId>
+			<artifactId>mongo-java-driver</artifactId>
+		</dependency>
+
 		<dependency>
 			<groupId>eu.dnetlib.dhp</groupId>
 			<artifactId>dhp-schemas</artifactId>
-			<version>${project.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>com.opencsv</groupId>
+			<artifactId>opencsv</artifactId>
 		</dependency>
 	</dependencies>

--- a/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
+++ b/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
@ -1,48 +0,0 @@
-
-package eu.dnetlib.collector.worker.model;
-
-import java.util.HashMap;
-import java.util.Map;
-
-public class ApiDescriptor {
-
-	private String id;
-
-	private String baseUrl;
-
-	private String protocol;
-
-	private Map<String, String> params = new HashMap<>();
-
-	public String getBaseUrl() {
-		return baseUrl;
-	}
-
-	public void setBaseUrl(final String baseUrl) {
-		this.baseUrl = baseUrl;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(final String id) {
-		this.id = id;
-	}
-
-	public Map<String, String> getParams() {
-		return params;
-	}
-
-	public void setParams(final HashMap<String, String> params) {
-		this.params = params;
-	}
-
-	public String getProtocol() {
-		return protocol;
-	}
-
-	public void setProtocol(final String protocol) {
-		this.protocol = protocol;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java
+++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java
@ -1,119 +0,0 @@
-
-package eu.dnetlib.data.mdstore.manager.common.model;
-
-import java.io.Serializable;
-import java.util.UUID;
-
-import javax.persistence.Column;
-import javax.persistence.Entity;
-import javax.persistence.Id;
-import javax.persistence.Table;
-
-@Entity
-@Table(name = "mdstores")
-public class MDStore implements Serializable {
-
-	/** */
-	private static final long serialVersionUID = 3160530489149700055L;
-
-	@Id
-	@Column(name = "id")
-	private String id;
-
-	@Column(name = "format")
-	private String format;
-
-	@Column(name = "layout")
-	private String layout;
-
-	@Column(name = "interpretation")
-	private String interpretation;
-
-	@Column(name = "datasource_name")
-	private String datasourceName;
-
-	@Column(name = "datasource_id")
-	private String datasourceId;
-
-	@Column(name = "api_id")
-	private String apiId;
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(final String id) {
-		this.id = id;
-	}
-
-	public String getFormat() {
-		return format;
-	}
-
-	public void setFormat(final String format) {
-		this.format = format;
-	}
-
-	public String getLayout() {
-		return layout;
-	}
-
-	public void setLayout(final String layout) {
-		this.layout = layout;
-	}
-
-	public String getInterpretation() {
-		return interpretation;
-	}
-
-	public void setInterpretation(final String interpretation) {
-		this.interpretation = interpretation;
-	}
-
-	public String getDatasourceName() {
-		return datasourceName;
-	}
-
-	public void setDatasourceName(final String datasourceName) {
-		this.datasourceName = datasourceName;
-	}
-
-	public String getDatasourceId() {
-		return datasourceId;
-	}
-
-	public void setDatasourceId(final String datasourceId) {
-		this.datasourceId = datasourceId;
-	}
-
-	public String getApiId() {
-		return apiId;
-	}
-
-	public void setApiId(final String apiId) {
-		this.apiId = apiId;
-	}
-
-	public static MDStore newInstance(
-		final String format, final String layout, final String interpretation) {
-		return newInstance(format, layout, interpretation, null, null, null);
-	}
-
-	public static MDStore newInstance(
-		final String format,
-		final String layout,
-		final String interpretation,
-		final String dsName,
-		final String dsId,
-		final String apiId) {
-		final MDStore md = new MDStore();
-		md.setId("md-" + UUID.randomUUID());
-		md.setFormat(format);
-		md.setLayout(layout);
-		md.setInterpretation(interpretation);
-		md.setDatasourceName(dsName);
-		md.setDatasourceId(dsId);
-		md.setApiId(apiId);
-		return md;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java
+++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java
@ -1,51 +0,0 @@
-
-package eu.dnetlib.data.mdstore.manager.common.model;
-
-import java.io.Serializable;
-
-import javax.persistence.Column;
-import javax.persistence.Entity;
-import javax.persistence.Id;
-import javax.persistence.Table;
-
-@Entity
-@Table(name = "mdstore_current_versions")
-public class MDStoreCurrentVersion implements Serializable {
-
-	/** */
-	private static final long serialVersionUID = -4757725888593745773L;
-
-	@Id
-	@Column(name = "mdstore")
-	private String mdstore;
-
-	@Column(name = "current_version")
-	private String currentVersion;
-
-	public String getMdstore() {
-		return mdstore;
-	}
-
-	public void setMdstore(final String mdstore) {
-		this.mdstore = mdstore;
-	}
-
-	public String getCurrentVersion() {
-		return currentVersion;
-	}
-
-	public void setCurrentVersion(final String currentVersion) {
-		this.currentVersion = currentVersion;
-	}
-
-	public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) {
-		final MDStoreCurrentVersion cv = new MDStoreCurrentVersion();
-		cv.setMdstore(mdId);
-		cv.setCurrentVersion(versionId);
-		return cv;
-	}
-
-	public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) {
-		return newInstance(v.getMdstore(), v.getId());
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java
+++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java
@ -1,99 +0,0 @@
-
-package eu.dnetlib.data.mdstore.manager.common.model;
-
-import java.io.Serializable;
-import java.util.Date;
-
-import javax.persistence.Column;
-import javax.persistence.Entity;
-import javax.persistence.Id;
-import javax.persistence.Table;
-import javax.persistence.Temporal;
-import javax.persistence.TemporalType;
-
-@Entity
-@Table(name = "mdstore_versions")
-public class MDStoreVersion implements Serializable {
-
-	/** */
-	private static final long serialVersionUID = -4763494442274298339L;
-
-	@Id
-	@Column(name = "id")
-	private String id;
-
-	@Column(name = "mdstore")
-	private String mdstore;
-
-	@Column(name = "writing")
-	private boolean writing;
-
-	@Column(name = "readcount")
-	private int readCount = 0;
-
-	@Column(name = "lastupdate")
-	@Temporal(TemporalType.TIMESTAMP)
-	private Date lastUpdate;
-
-	@Column(name = "size")
-	private long size = 0;
-
-	public static MDStoreVersion newInstance(final String mdId, final boolean writing) {
-		final MDStoreVersion t = new MDStoreVersion();
-		t.setId(mdId + "-" + new Date().getTime());
-		t.setMdstore(mdId);
-		t.setLastUpdate(null);
-		t.setWriting(writing);
-		t.setReadCount(0);
-		t.setSize(0);
-		return t;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(final String id) {
-		this.id = id;
-	}
-
-	public String getMdstore() {
-		return mdstore;
-	}
-
-	public void setMdstore(final String mdstore) {
-		this.mdstore = mdstore;
-	}
-
-	public boolean isWriting() {
-		return writing;
-	}
-
-	public void setWriting(final boolean writing) {
-		this.writing = writing;
-	}
-
-	public int getReadCount() {
-		return readCount;
-	}
-
-	public void setReadCount(final int readCount) {
-		this.readCount = readCount;
-	}
-
-	public Date getLastUpdate() {
-		return lastUpdate;
-	}
-
-	public void setLastUpdate(final Date lastUpdate) {
-		this.lastUpdate = lastUpdate;
-	}
-
-	public long getSize() {
-		return size;
-	}
-
-	public void setSize(final long size) {
-		this.size = size;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java
+++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java
@ -1,143 +0,0 @@
-
-package eu.dnetlib.data.mdstore.manager.common.model;
-
-import java.io.Serializable;
-import java.util.Date;
-
-import javax.persistence.Column;
-import javax.persistence.Entity;
-import javax.persistence.Id;
-import javax.persistence.Table;
-import javax.persistence.Temporal;
-import javax.persistence.TemporalType;
-
-@Entity
-@Table(name = "mdstores_with_info")
-public class MDStoreWithInfo implements Serializable {
-
-	/** */
-	private static final long serialVersionUID = -8445784770687571492L;
-
-	@Id
-	@Column(name = "id")
-	private String id;
-
-	@Column(name = "format")
-	private String format;
-
-	@Column(name = "layout")
-	private String layout;
-
-	@Column(name = "interpretation")
-	private String interpretation;
-
-	@Column(name = "datasource_name")
-	private String datasourceName;
-
-	@Column(name = "datasource_id")
-	private String datasourceId;
-
-	@Column(name = "api_id")
-	private String apiId;
-
-	@Column(name = "current_version")
-	private String currentVersion;
-
-	@Column(name = "lastupdate")
-	@Temporal(TemporalType.TIMESTAMP)
-	private Date lastUpdate;
-
-	@Column(name = "size")
-	private long size = 0;
-
-	@Column(name = "n_versions")
-	private long numberOfVersions = 0;
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(final String id) {
-		this.id = id;
-	}
-
-	public String getFormat() {
-		return format;
-	}
-
-	public void setFormat(final String format) {
-		this.format = format;
-	}
-
-	public String getLayout() {
-		return layout;
-	}
-
-	public void setLayout(final String layout) {
-		this.layout = layout;
-	}
-
-	public String getInterpretation() {
-		return interpretation;
-	}
-
-	public void setInterpretation(final String interpretation) {
-		this.interpretation = interpretation;
-	}
-
-	public String getDatasourceName() {
-		return datasourceName;
-	}
-
-	public void setDatasourceName(final String datasourceName) {
-		this.datasourceName = datasourceName;
-	}
-
-	public String getDatasourceId() {
-		return datasourceId;
-	}
-
-	public void setDatasourceId(final String datasourceId) {
-		this.datasourceId = datasourceId;
-	}
-
-	public String getApiId() {
-		return apiId;
-	}
-
-	public void setApiId(final String apiId) {
-		this.apiId = apiId;
-	}
-
-	public String getCurrentVersion() {
-		return currentVersion;
-	}
-
-	public void setCurrentVersion(final String currentVersion) {
-		this.currentVersion = currentVersion;
-	}
-
-	public Date getLastUpdate() {
-		return lastUpdate;
-	}
-
-	public void setLastUpdate(final Date lastUpdate) {
-		this.lastUpdate = lastUpdate;
-	}
-
-	public long getSize() {
-		return size;
-	}
-
-	public void setSize(final long size) {
-		this.size = size;
-	}
-
-	public long getNumberOfVersions() {
-		return numberOfVersions;
-	}
-
-	public void setNumberOfVersions(final long numberOfVersions) {
-		this.numberOfVersions = numberOfVersions;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
@ -1,10 +1,7 @@

 package eu.dnetlib.dhp.application;

-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Serializable;
-import java.io.StringWriter;
+import java.io.*;
 import java.util.*;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
@ -12,17 +9,21 @@ import java.util.zip.GZIPOutputStream;
 import org.apache.commons.cli.*;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

 public class ArgumentApplicationParser implements Serializable {

+	private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class);
+
 	private final Options options = new Options();
 	private final Map<String, String> objectMap = new HashMap<>();

 	private final List<String> compressedValues = new ArrayList<>();

-	public ArgumentApplicationParser(final String json_configuration) throws Exception {
+	public ArgumentApplicationParser(final String json_configuration) throws IOException {
 		final ObjectMapper mapper = new ObjectMapper();
 		final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
 		createOptionMap(configuration);
@ -33,7 +34,6 @@ public class ArgumentApplicationParser implements Serializable {
 	}

 	private void createOptionMap(final OptionsParameter[] configuration) {
-
 		Arrays
 			.stream(configuration)
 			.map(
@ -47,10 +47,6 @@ public class ArgumentApplicationParser implements Serializable {
 					return o;
 				})
 			.forEach(options::addOption);
-
-		// HelpFormatter formatter = new HelpFormatter();
-		// formatter.printHelp("myapp", null, options, null, true);
-
 	}

 	public static String decompressValue(final String abstractCompressed) {
@ -60,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
 			final StringWriter stringWriter = new StringWriter();
 			IOUtils.copy(gis, stringWriter);
 			return stringWriter.toString();
-		} catch (Throwable e) {
-			System.out.println("Wrong value to decompress:" + abstractCompressed);
-			throw new RuntimeException(e);
+		} catch (IOException e) {
+			log.error("Wrong value to decompress: {}", abstractCompressed);
+			throw new IllegalArgumentException(e);
 		}
 	}

-	public static String compressArgument(final String value) throws Exception {
+	public static String compressArgument(final String value) throws IOException {
 		ByteArrayOutputStream out = new ByteArrayOutputStream();
 		GZIPOutputStream gzip = new GZIPOutputStream(out);
 		gzip.write(value.getBytes());
@ -74,7 +70,7 @@ public class ArgumentApplicationParser implements Serializable {
 		return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
 	}

-	public void parseArgument(final String[] args) throws Exception {
+	public void parseArgument(final String[] args) throws ParseException {
 		CommandLineParser parser = new BasicParser();
 		CommandLine cmd = parser.parse(options, args);
 		Arrays
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java
@ -9,9 +9,6 @@ public class OptionsParameter {
 	private boolean paramRequired;
 	private boolean compressed;

-	public OptionsParameter() {
-	}
-
 	public String getParamName() {
 		return paramName;
 	}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java
@ -0,0 +1,48 @@
+
+package eu.dnetlib.dhp.collection;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ApiDescriptor {
+
+	private String id;
+
+	private String baseUrl;
+
+	private String protocol;
+
+	private Map<String, String> params = new HashMap<>();
+
+	public String getBaseUrl() {
+		return baseUrl;
+	}
+
+	public void setBaseUrl(final String baseUrl) {
+		this.baseUrl = baseUrl;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public Map<String, String> getParams() {
+		return params;
+	}
+
+	public void setParams(final Map<String, String> params) {
+		this.params = params;
+	}
+
+	public String getProtocol() {
+		return protocol;
+	}
+
+	public void setProtocol(final String protocol) {
+		this.protocol = protocol;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -0,0 +1,61 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+
+public class Constants {
+
+	public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
+	public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
+
+	public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
+
+	private Constants() {
+	}
+
+	static {
+		accessRightsCoarMap.put("OPEN", "c_abf2");
+		accessRightsCoarMap.put("RESTRICTED", "c_16ec");
+		accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
+		accessRightsCoarMap.put("CLOSED", "c_14cb");
+		accessRightsCoarMap.put("EMBARGO", "c_f1cf");
+	}
+
+	static {
+		coarCodeLabelMap.put("c_abf2", "OPEN");
+		coarCodeLabelMap.put("c_16ec", "RESTRICTED");
+		coarCodeLabelMap.put("c_14cb", "CLOSED");
+		coarCodeLabelMap.put("c_f1cf", "EMBARGO");
+	}
+
+	public static final String SEQUENCE_FILE_NAME = "/sequence_file";
+	public static final String REPORT_FILE_NAME = "/report";
+	public static final String MDSTORE_DATA_PATH = "/store";
+	public static final String MDSTORE_SIZE_PATH = "/size";
+
+	public static final String COLLECTION_MODE = "collectionMode";
+	public static final String METADATA_ENCODING = "metadataEncoding";
+	public static final String OOZIE_WF_PATH = "oozieWfPath";
+	public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
+
+	public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
+	public static final String REQUEST_DELAY = "requestDelay";
+	public static final String RETRY_DELAY = "retryDelay";
+	public static final String CONNECT_TIMEOUT = "connectTimeOut";
+	public static final String READ_TIMEOUT = "readTimeOut";
+	public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
+	public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
+
+	public static final String CONTENT_TOTALITEMS = "TotalItems";
+	public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
+	public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
+
+	// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
+	// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
+	public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
+	public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
+	public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -14,7 +14,7 @@ public class DbClient implements Closeable {

 	private static final Log log = LogFactory.getLog(DbClient.class);

-	private Connection connection;
+	private final Connection connection;

 	public DbClient(final String address, final String login, final String password) {

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java
@ -28,7 +28,7 @@ public class HdfsSupport {
 	 * @param configuration Configuration of hadoop env
 	 */
 	public static boolean exists(String path, Configuration configuration) {
-		logger.info("Removing path: {}", path);
+		logger.info("Checking existence for path: {}", path);
 		return rethrowAsRuntimeException(
 			() -> {
 				Path f = new Path(path);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@ -0,0 +1,116 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+import org.apache.hadoop.fs.*;
+
+public class MakeTarArchive implements Serializable {
+
+	private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
+		Path hdfsWritePath = new Path(outputPath);
+		if (fileSystem.exists(hdfsWritePath)) {
+			fileSystem.delete(hdfsWritePath, true);
+
+		}
+		return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
+	}
+
+	private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
+		throws IOException {
+
+		Path hdfsWritePath = new Path(outputPath);
+		if (fileSystem.exists(hdfsWritePath)) {
+			fileSystem.delete(hdfsWritePath, true);
+
+		}
+		try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
+			fileSystem.create(hdfsWritePath).getWrappedStream())) {
+
+			RemoteIterator<LocatedFileStatus> iterator = fileSystem
+				.listFiles(
+					new Path(inputPath), true);
+
+			while (iterator.hasNext()) {
+				writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
+			}
+
+		}
+	}
+
+	public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
+		int gBperSplit) throws IOException {
+		final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit;
+
+		long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed();
+
+		if (sourceSize < bytesPerSplit) {
+			write(fileSystem, inputPath, outputPath + ".tar", dir_name);
+		} else {
+			int partNum = 0;
+
+			RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
+				.listFiles(
+					new Path(inputPath), true);
+			boolean next = fileStatusListIterator.hasNext();
+			while (next) {
+				TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar");
+
+				long current_size = 0;
+				while (next && current_size < bytesPerSplit) {
+					current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size);
+					next = fileStatusListIterator.hasNext();
+
+				}
+
+				partNum += 1;
+				ar.close();
+			}
+
+		}
+
+	}
+
+	private static long writeCurrentFile(FileSystem fileSystem, String dir_name,
+		RemoteIterator<LocatedFileStatus> fileStatusListIterator,
+		TarArchiveOutputStream ar, long current_size) throws IOException {
+		LocatedFileStatus fileStatus = fileStatusListIterator.next();
+
+		Path p = fileStatus.getPath();
+		String p_string = p.toString();
+		if (!p_string.endsWith("_SUCCESS")) {
+			String name = p_string.substring(p_string.lastIndexOf("/") + 1);
+			if (name.startsWith("part-") & name.length() > 10) {
+				String tmp = name.substring(0, 10);
+				if (name.contains(".")) {
+					tmp += name.substring(name.indexOf("."));
+				}
+				name = tmp;
+			}
+			TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
+			entry.setSize(fileStatus.getLen());
+			current_size += fileStatus.getLen();
+			ar.putArchiveEntry(entry);
+
+			InputStream is = fileSystem.open(fileStatus.getPath());
+
+			BufferedInputStream bis = new BufferedInputStream(is);
+
+			int count;
+			byte[] data = new byte[1024];
+			while ((count = bis.read(data, 0, data.length)) != -1) {
+				ar.write(data, 0, count);
+			}
+			bis.close();
+			ar.closeArchiveEntry();
+
+		}
+		return current_size;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java
@ -0,0 +1,122 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.StreamSupport;
+
+import org.apache.commons.lang3.StringUtils;
+import org.bson.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Iterables;
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+import com.mongodb.QueryBuilder;
+import com.mongodb.client.FindIterable;
+import com.mongodb.client.MongoCollection;
+import com.mongodb.client.MongoDatabase;
+
+public class MdstoreClient implements Closeable {
+
+	private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
+
+	private final MongoClient client;
+	private final MongoDatabase db;
+
+	private static final String COLL_METADATA = "metadata";
+	private static final String COLL_METADATA_MANAGER = "metadataManager";
+
+	public MdstoreClient(final String baseUrl, final String dbName) {
+		this.client = new MongoClient(new MongoClientURI(baseUrl));
+		this.db = getDb(client, dbName);
+	}
+
+	public MongoCollection<Document> mdStore(final String mdId) {
+		BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
+
+		log.info("querying current mdId: {}", query.toJson());
+
+		final String currentId = Optional
+			.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
+			.map(FindIterable::first)
+			.map(d -> d.getString("currentId"))
+			.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
+
+		log.info("currentId: {}", currentId);
+
+		return getColl(db, currentId, true);
+	}
+
+	public Map<String, String> validCollections(
+		final String mdFormat, final String mdLayout, final String mdInterpretation) {
+
+		final Map<String, String> transactions = new HashMap<>();
+		for (final Document entry : getColl(db, COLL_METADATA_MANAGER, true).find()) {
+			final String mdId = entry.getString("mdId");
+			final String currentId = entry.getString("currentId");
+			if (StringUtils.isNoneBlank(mdId, currentId)) {
+				transactions.put(mdId, currentId);
+			}
+		}
+
+		final Map<String, String> res = new HashMap<>();
+		for (final Document entry : getColl(db, COLL_METADATA, true).find()) {
+			if (entry.getString("format").equals(mdFormat)
+				&& entry.getString("layout").equals(mdLayout)
+				&& entry.getString("interpretation").equals(mdInterpretation)
+				&& transactions.containsKey(entry.getString("mdId"))) {
+				res.put(entry.getString("mdId"), transactions.get(entry.getString("mdId")));
+			}
+		}
+
+		return res;
+	}
+
+	private MongoDatabase getDb(final MongoClient client, final String dbName) {
+		if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
+			final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
+			log.warn(err);
+			throw new IllegalArgumentException(err);
+		}
+		return client.getDatabase(dbName);
+	}
+
+	private MongoCollection<Document> getColl(
+		final MongoDatabase db, final String collName, final boolean abortIfMissing) {
+		if (!Iterables.contains(db.listCollectionNames(), collName)) {
+			final String err = String
+				.format(
+					String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
+			log.warn(err);
+			if (abortIfMissing) {
+				throw new IllegalArgumentException(err);
+			} else {
+				return null;
+			}
+		}
+		return db.getCollection(collName);
+	}
+
+	public Iterable<String> listRecords(final String collName) {
+		final MongoCollection<Document> coll = getColl(db, collName, false);
+		return coll == null
+			? new ArrayList<>()
+			: () -> StreamSupport
+				.stream(coll.find().spliterator(), false)
+				.filter(e -> e.containsKey("body"))
+				.map(e -> e.getString("body"))
+				.iterator();
+	}
+
+	@Override
+	public void close() throws IOException {
+		client.close();
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
 */
 public class PacePerson {

-	private static final String UTF8 = "UTF-8";
 	private List<String> name = Lists.newArrayList();
 	private List<String> surname = Lists.newArrayList();
 	private List<String> fullname = Lists.newArrayList();
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java
@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.common.aggregation;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.message.MessageSender;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class AggregatorReport extends LinkedHashMap<String, String> implements Closeable {
+
+	private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
+
+	private transient MessageSender messageSender;
+
+	public AggregatorReport() {
+	}
+
+	public AggregatorReport(MessageSender messageSender) {
+		this.messageSender = messageSender;
+	}
+
+	public void ongoing(Long current, Long total) {
+		messageSender.sendMessage(current, total);
+	}
+
+	@Override
+	public void close() throws IOException {
+		if (Objects.nonNull(messageSender)) {
+			log.info("closing report: ");
+			this.forEach((k, v) -> log.info("{} - {}", k, v));
+
+			Map<String, String> m = new HashMap<>();
+			m.put(getClass().getSimpleName().toLowerCase(), DHPUtils.MAPPER.writeValueAsString(values()));
+			messageSender.sendReport(m);
+		}
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -13,9 +13,9 @@ import okio.Source;

 public class InputStreamRequestBody extends RequestBody {

-	private InputStream inputStream;
-	private MediaType mediaType;
-	private long lenght;
+	private final InputStream inputStream;
+	private final MediaType mediaType;
+	private final long lenght;

 	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -3,6 +3,10 @@ package eu.dnetlib.dhp.common.api;

 import java.io.*;
 import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.http.HttpHeaders;
+import org.apache.http.entity.ContentType;

 import com.google.gson.Gson;

@ -42,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
 		this.deposition_id = deposition_id;
 	}

-	public ZenodoAPIClient(String urlString, String access_token) throws IOException {
+	public ZenodoAPIClient(String urlString, String access_token) {

 		this.urlString = urlString;
 		this.access_token = access_token;
@ -50,19 +54,20 @@ public class ZenodoAPIClient implements Serializable {

 	/**
 	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
+	 *
 	 * @return response code
 	 * @throws IOException
 	 */
 	public int newDeposition() throws IOException {
 		String json = "{}";
-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();

-		RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
+		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);

 		Request request = new Request.Builder()
 			.url(urlString)
-			.addHeader("Content-Type", "application/json") // add request headers
-			.addHeader("Authorization", "Bearer " + access_token)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
 			.post(body)
 			.build();

@ -86,18 +91,23 @@ public class ZenodoAPIClient implements Serializable {

 	/**
 	 * Upload files in Zenodo.
+	 *
 	 * @param is the inputStream for the file to upload
 	 * @param file_name the name of the file as it will appear on Zenodo
 	 * @param len the size of the file
 	 * @return the response code
 	 */
 	public int uploadIS(InputStream is, String file_name, long len) throws IOException {
-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder()
+			.writeTimeout(600, TimeUnit.SECONDS)
+			.readTimeout(600, TimeUnit.SECONDS)
+			.connectTimeout(600, TimeUnit.SECONDS)
+			.build();

 		Request request = new Request.Builder()
 			.url(bucket + "/" + file_name)
-			.addHeader("Content-Type", "application/zip") // add request headers
-			.addHeader("Authorization", "Bearer " + access_token)
+			.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
 			.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
 			.build();

@ -110,20 +120,21 @@ public class ZenodoAPIClient implements Serializable {

 	/**
 	 * Associates metadata information to the current deposition
+	 *
 	 * @param metadata the metadata
 	 * @return response code
 	 * @throws IOException
 	 */
 	public int sendMretadata(String metadata) throws IOException {

-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();

-		RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
+		RequestBody body = RequestBody.create(metadata, MEDIA_TYPE_JSON);

 		Request request = new Request.Builder()
 			.url(urlString + "/" + deposition_id)
-			.addHeader("Content-Type", "application/json") // add request headers
-			.addHeader("Authorization", "Bearer " + access_token)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
 			.put(body)
 			.build();

@ -140,6 +151,7 @@ public class ZenodoAPIClient implements Serializable {

 	/**
 	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
+	 *
 	 * @return response code
 	 * @throws IOException
 	 */
@ -147,12 +159,14 @@ public class ZenodoAPIClient implements Serializable {

 		String json = "{}";

-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+
+		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);

 		Request request = new Request.Builder()
 			.url(urlString + "/" + deposition_id + "/actions/publish")
 			.addHeader("Authorization", "Bearer " + access_token)
-			.post(RequestBody.create(MEDIA_TYPE_JSON, json))
+			.post(body)
 			.build();

 		try (Response response = httpClient.newCall(request).execute()) {
@ -166,11 +180,12 @@ public class ZenodoAPIClient implements Serializable {
 	}

 	/**
-	 * To create a new version of an already published deposition.
-	 * It sets the deposition_id and the bucket to be used for the new version.
-	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
-	 *                       the last part of the url for the DOI Zenodo suggests to use to cite all versions:
-	 *                       DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
+	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
+	 * for the new version.
+	 *
+	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
+	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
+	 *            concept_rec_id = 656930
 	 * @return response code
 	 * @throws IOException
 	 * @throws MissingConceptDoiException
@ -179,12 +194,14 @@ public class ZenodoAPIClient implements Serializable {
 		setDepositionId(concept_rec_id);
 		String json = "{}";

-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+
+		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);

 		Request request = new Request.Builder()
 			.url(urlString + "/" + deposition_id + "/actions/newversion")
-			.addHeader("Authorization", "Bearer " + access_token)
-			.post(RequestBody.create(MEDIA_TYPE_JSON, json))
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
+			.post(body)
 			.build();

 		try (Response response = httpClient.newCall(request).execute()) {
@ -201,6 +218,41 @@ public class ZenodoAPIClient implements Serializable {
 		}
 	}

+	/**
+	 * To finish uploading a version or new deposition not published
+	 * It sets the deposition_id and the bucket to be used
+	 *
+	 *
+	 * @param deposition_id the deposition id of the not yet published upload
+	 *            concept_rec_id = 656930
+	 * @return response code
+	 * @throws IOException
+	 * @throws MissingConceptDoiException
+	 */
+	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
+
+		this.deposition_id = deposition_id;
+
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+
+		Request request = new Request.Builder()
+			.url(urlString + "/" + deposition_id)
+			.addHeader("Authorization", "Bearer " + access_token)
+			.build();
+
+		try (Response response = httpClient.newCall(request).execute()) {
+
+			if (!response.isSuccessful())
+				throw new IOException("Unexpected code " + response + response.body().string());
+
+			ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
+			bucket = zenodoModel.getLinks().getBucket();
+			return response.code();
+
+		}
+
+	}
+
 	private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {

 		ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
@ -217,12 +269,12 @@ public class ZenodoAPIClient implements Serializable {
 	}

 	private String getPrevDepositions() throws IOException {
-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();

 		Request request = new Request.Builder()
 			.url(urlString)
-			.addHeader("Content-Type", "application/json") // add request headers
-			.addHeader("Authorization", "Bearer " + access_token)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
 			.get()
 			.build();

@ -238,12 +290,14 @@ public class ZenodoAPIClient implements Serializable {
 	}

 	private String getBucket(String url) throws IOException {
-		OkHttpClient httpClient = new OkHttpClient();
+		OkHttpClient httpClient = new OkHttpClient.Builder()
+			.connectTimeout(600, TimeUnit.SECONDS)
+			.build();

 		Request request = new Request.Builder()
 			.url(url)
-			.addHeader("Content-Type", "application/json") // add request headers
-			.addHeader("Authorization", "Bearer " + access_token)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
 			.get()
 			.build();

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -32,13 +32,13 @@ public class Creator {

 	public static Creator newInstance(String name, String affiliation, String orcid) {
 		Creator c = new Creator();
-		if (!(name == null)) {
+		if (name != null) {
 			c.name = name;
 		}
-		if (!(affiliation == null)) {
+		if (affiliation != null) {
 			c.affiliation = affiliation;
 		}
-		if (!(orcid == null)) {
+		if (orcid != null) {
 			c.orcid = orcid;
 		}

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;

 import java.io.Serializable;

-import net.minidev.json.annotate.JsonIgnore;
-
 public class File implements Serializable {
 	private String checksum;
 	private String filename;
 	private long filesize;
 	private String id;

-	@JsonIgnore
-	// private Links links;
-
 	public String getChecksum() {
 		return checksum;
 	}
@ -46,13 +41,4 @@ public class File implements Serializable {
 		this.id = id;
 	}

-//    @JsonIgnore
-//    public Links getLinks() {
-//        return links;
-//    }
-//
-//    @JsonIgnore
-//    public void setLinks(Links links) {
-//        this.links = links;
-//    }
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java
@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+public class CollectorException extends Exception {
+
+	/** */
+	private static final long serialVersionUID = -290723075076039757L;
+
+	public CollectorException() {
+		super();
+	}
+
+	public CollectorException(
+		final String message,
+		final Throwable cause,
+		final boolean enableSuppression,
+		final boolean writableStackTrace) {
+		super(message, cause, enableSuppression, writableStackTrace);
+	}
+
+	public CollectorException(final String message, final Throwable cause) {
+		super(message, cause);
+	}
+
+	public CollectorException(final String message) {
+		super(message);
+	}
+
+	public CollectorException(final Throwable cause) {
+		super(cause);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java
@ -0,0 +1,56 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.opencsv.bean.CsvToBeanBuilder;
+
+public class GetCSV {
+
+	public static final char DEFAULT_DELIMITER = ',';
+
+	private GetCSV() {
+	}
+
+	public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
+		String modelClass) throws IOException, ClassNotFoundException {
+		getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
+	}
+
+	public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
+		String modelClass, char delimiter) throws IOException, ClassNotFoundException {
+
+		Path hdfsWritePath = new Path(hdfsPath);
+		FSDataOutputStream fsDataOutputStream = null;
+		if (fileSystem.exists(hdfsWritePath)) {
+			fileSystem.delete(hdfsWritePath, false);
+		}
+		fsDataOutputStream = fileSystem.create(hdfsWritePath);
+
+		try (BufferedWriter writer = new BufferedWriter(
+			new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
+
+			final ObjectMapper mapper = new ObjectMapper();
+
+			@SuppressWarnings("unchecked")
+			final List lines = new CsvToBeanBuilder(reader)
+				.withType(Class.forName(modelClass))
+				.withSeparator(delimiter)
+				.build()
+				.parse();
+
+			for (Object line : lines) {
+				writer.write(mapper.writeValueAsString(line));
+				writer.newLine();
+			}
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
@ -0,0 +1,94 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+/**
+ * Bundles the http connection parameters driving the client behaviour.
+ */
+public class HttpClientParams {
+
+	// Defaults
+	public static int _maxNumberOfRetry = 3;
+	public static int _requestDelay = 0; // milliseconds
+	public static int _retryDelay = 10; // seconds
+	public static int _connectTimeOut = 10; // seconds
+	public static int _readTimeOut = 30; // seconds
+
+	/**
+	 * Maximum number of allowed retires before failing
+	 */
+	private int maxNumberOfRetry;
+
+	/**
+	 * Delay between request (Milliseconds)
+	 */
+	private int requestDelay;
+
+	/**
+	 * Time to wait after a failure before retrying (Seconds)
+	 */
+	private int retryDelay;
+
+	/**
+	 * Connect timeout (Seconds)
+	 */
+	private int connectTimeOut;
+
+	/**
+	 * Read timeout (Seconds)
+	 */
+	private int readTimeOut;
+
+	public HttpClientParams() {
+		this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
+	}
+
+	public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
+		int readTimeOut) {
+		this.maxNumberOfRetry = maxNumberOfRetry;
+		this.requestDelay = requestDelay;
+		this.retryDelay = retryDelay;
+		this.connectTimeOut = connectTimeOut;
+		this.readTimeOut = readTimeOut;
+	}
+
+	public int getMaxNumberOfRetry() {
+		return maxNumberOfRetry;
+	}
+
+	public void setMaxNumberOfRetry(int maxNumberOfRetry) {
+		this.maxNumberOfRetry = maxNumberOfRetry;
+	}
+
+	public int getRequestDelay() {
+		return requestDelay;
+	}
+
+	public void setRequestDelay(int requestDelay) {
+		this.requestDelay = requestDelay;
+	}
+
+	public int getRetryDelay() {
+		return retryDelay;
+	}
+
+	public void setRetryDelay(int retryDelay) {
+		this.retryDelay = retryDelay;
+	}
+
+	public void setConnectTimeOut(int connectTimeOut) {
+		this.connectTimeOut = connectTimeOut;
+	}
+
+	public int getConnectTimeOut() {
+		return connectTimeOut;
+	}
+
+	public int getReadTimeOut() {
+		return readTimeOut;
+	}
+
+	public void setReadTimeOut(int readTimeOut) {
+		this.readTimeOut = readTimeOut;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
@ -0,0 +1,263 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.*;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.*;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.http.HttpHeaders;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.common.Constants;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+
+/**
+ * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
+ *
+ * @author jochen, michele, andrea, alessia, claudio, andreas
+ */
+public class HttpConnector2 {
+
+	private static final Logger log = LoggerFactory.getLogger(HttpConnector2.class);
+
+	private static final String REPORT_PREFIX = "http:";
+
+	private HttpClientParams clientParams;
+
+	private String responseType = null;
+
+	private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
+
+	public HttpConnector2() {
+		this(new HttpClientParams());
+	}
+
+	public HttpConnector2(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
+	}
+
+	/**
+	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
+	 */
+	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorException {
+		return IOUtils.toInputStream(getInputSource(requestUrl));
+	}
+
+	/**
+	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
+	 */
+	public String getInputSource(final String requestUrl) throws CollectorException {
+		return attemptDownloadAsString(requestUrl, 1, new AggregatorReport());
+	}
+
+	/**
+	 * Given the URL returns the content via HTTP GET
+	 *
+	 * @param requestUrl the URL
+	 * @param report the list of errors
+	 * @return the content of the downloaded resource
+	 * @throws CollectorException when retrying more than maxNumberOfRetry times
+	 */
+	public String getInputSource(final String requestUrl, AggregatorReport report)
+		throws CollectorException {
+		return attemptDownloadAsString(requestUrl, 1, report);
+	}
+
+	private String attemptDownloadAsString(final String requestUrl, final int retryNumber,
+		final AggregatorReport report) throws CollectorException {
+
+		try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) {
+			return IOUtils.toString(s);
+		} catch (IOException e) {
+			log.error(e.getMessage(), e);
+			throw new CollectorException(e);
+		}
+	}
+
+	private InputStream attemptDownload(final String requestUrl, final int retryNumber,
+		final AggregatorReport report) throws CollectorException, IOException {
+
+		if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
+			final String msg = String
+				.format(
+					"Max number of retries (%s/%s) exceeded, failing.",
+					retryNumber, getClientParams().getMaxNumberOfRetry());
+			log.error(msg);
+			throw new CollectorException(msg);
+		}
+
+		log.info("Request attempt {} [{}]", retryNumber, requestUrl);
+
+		InputStream input = null;
+
+		try {
+			if (getClientParams().getRequestDelay() > 0) {
+				backoffAndSleep(getClientParams().getRequestDelay());
+			}
+			final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
+			urlConn.setInstanceFollowRedirects(false);
+			urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
+			urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
+			urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
+
+			if (log.isDebugEnabled()) {
+				logHeaderFields(urlConn);
+			}
+
+			int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
+			String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
+			String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
+
+			if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 9)) {
+				if (retryAfter > 0) {
+					backoffAndSleep(retryAfter);
+				} else {
+					backoffAndSleep(1000);
+				}
+			}
+
+			if (is2xx(urlConn.getResponseCode())) {
+				input = urlConn.getInputStream();
+				responseType = urlConn.getContentType();
+				return input;
+			}
+			if (is3xx(urlConn.getResponseCode())) {
+				// REDIRECTS
+				final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
+				log.info("The requested url has been moved to {}", newUrl);
+				report
+					.put(
+						REPORT_PREFIX + urlConn.getResponseCode(),
+						String.format("Moved to: %s", newUrl));
+				urlConn.disconnect();
+				if (retryAfter > 0) {
+					backoffAndSleep(retryAfter);
+				}
+				return attemptDownload(newUrl, retryNumber + 1, report);
+			}
+			if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
+				switch (urlConn.getResponseCode()) {
+					case HttpURLConnection.HTTP_NOT_FOUND:
+					case HttpURLConnection.HTTP_BAD_GATEWAY:
+					case HttpURLConnection.HTTP_UNAVAILABLE:
+					case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:
+						if (retryAfter > 0) {
+							log
+								.warn(
+									"{} - waiting and repeating request after suggested retry-after {} sec.",
+									requestUrl, retryAfter);
+							backoffAndSleep(retryAfter * 1000);
+						} else {
+							log
+								.warn(
+									"{} - waiting and repeating request after default delay of {} sec.",
+									requestUrl, getClientParams().getRetryDelay());
+							backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
+						}
+						report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
+						urlConn.disconnect();
+						return attemptDownload(requestUrl, retryNumber + 1, report);
+					default:
+						report
+							.put(
+								REPORT_PREFIX + urlConn.getResponseCode(),
+								String
+									.format(
+										"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
+						throw new CollectorException(urlConn.getResponseCode() + " error " + report);
+				}
+			}
+			throw new CollectorException(
+				String
+					.format(
+						"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
+						MAPPER.writeValueAsString(report)));
+		} catch (MalformedURLException | UnknownHostException e) {
+			log.error(e.getMessage(), e);
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new CollectorException(e.getMessage(), e);
+		} catch (SocketTimeoutException | SocketException e) {
+			log.error(e.getMessage(), e);
+			report.put(e.getClass().getName(), e.getMessage());
+			backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
+			return attemptDownload(requestUrl, retryNumber + 1, report);
+		}
+	}
+
+	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
+		log.debug("StatusCode: {}", urlConn.getResponseMessage());
+
+		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
+			if (e.getKey() != null) {
+				for (String v : e.getValue()) {
+					log.debug("  key: {} - value: {}", e.getKey(), v);
+				}
+			}
+		}
+	}
+
+	private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
+		log.info("I'm going to sleep for {}ms", sleepTimeMs);
+		try {
+			Thread.sleep(sleepTimeMs);
+		} catch (InterruptedException e) {
+			log.error(e.getMessage(), e);
+			throw new CollectorException(e);
+		}
+	}
+
+	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
+				&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
+				return Integer.parseInt(headerMap.get(key).get(0)) + 10;
+			}
+		}
+		return -1;
+	}
+
+	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorException {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) {
+				return headerMap.get(key).get(0);
+			}
+		}
+		throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING");
+	}
+
+	private boolean is2xx(final int statusCode) {
+		return statusCode >= 200 && statusCode <= 299;
+	}
+
+	private boolean is4xx(final int statusCode) {
+		return statusCode >= 400 && statusCode <= 499;
+	}
+
+	private boolean is3xx(final int statusCode) {
+		return statusCode >= 300 && statusCode <= 399;
+	}
+
+	private boolean is5xx(final int statusCode) {
+		return statusCode >= 500 && statusCode <= 599;
+	}
+
+	public String getResponseType() {
+		return responseType;
+	}
+
+	public HttpClientParams getClientParams() {
+		return clientParams;
+	}
+
+	public void setClientParams(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
@ -0,0 +1,75 @@
+
+package eu.dnetlib.dhp.common.rest;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class DNetRestClient {
+
+	private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
+
+	private static final ObjectMapper mapper = new ObjectMapper();
+
+	private DNetRestClient() {
+	}
+
+	public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
+		final HttpGet httpGet = new HttpGet(url);
+		return doHTTPRequest(httpGet, clazz);
+	}
+
+	public static String doGET(final String url) throws IOException {
+		final HttpGet httpGet = new HttpGet(url);
+		return doHTTPRequest(httpGet);
+	}
+
+	public static <V> String doPOST(final String url, V objParam) throws IOException {
+		final HttpPost httpPost = new HttpPost(url);
+
+		if (objParam != null) {
+			final StringEntity entity = new StringEntity(mapper.writeValueAsString(objParam));
+			httpPost.setEntity(entity);
+			httpPost.setHeader("Accept", "application/json");
+			httpPost.setHeader("Content-type", "application/json");
+		}
+		return doHTTPRequest(httpPost);
+	}
+
+	public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
+		return mapper.readValue(doPOST(url, objParam), clazz);
+	}
+
+	private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
+		try (CloseableHttpClient client = HttpClients.createDefault()) {
+
+			log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
+			log
+				.info(
+					"request headers: {}",
+					Arrays
+						.asList(r.getAllHeaders())
+						.stream()
+						.map(h -> h.getName() + ":" + h.getValue())
+						.collect(Collectors.joining(",")));
+
+			return IOUtils.toString(client.execute(r).getEntity().getContent());
+		}
+	}
+
+	private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
+		return mapper.readValue(doHTTPRequest(r), clazz);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
@ -0,0 +1,86 @@
+
+package eu.dnetlib.dhp.common.vocabulary;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.Maps;
+
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+
+public class Vocabulary implements Serializable {
+
+	private final String id;
+	private final String name;
+
+	/**
+	 * Code to Term mappings for this Vocabulary.
+	 */
+	private final Map<String, VocabularyTerm> terms = new HashMap<>();
+
+	/**
+	 * Synonym to Code mappings for this Vocabulary.
+	 */
+	private final Map<String, String> synonyms = Maps.newHashMap();
+
+	public Vocabulary(final String id, final String name) {
+		this.id = id;
+		this.name = name;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	protected Map<String, VocabularyTerm> getTerms() {
+		return terms;
+	}
+
+	public VocabularyTerm getTerm(final String id) {
+		return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
+	}
+
+	protected void addTerm(final String id, final String name) {
+		terms.put(id.toLowerCase(), new VocabularyTerm(id, name));
+	}
+
+	protected boolean termExists(final String id) {
+		return terms.containsKey(id.toLowerCase());
+	}
+
+	protected void addSynonym(final String syn, final String termCode) {
+		synonyms.put(syn, termCode.toLowerCase());
+	}
+
+	public VocabularyTerm getTermBySynonym(final String syn) {
+		return getTerm(synonyms.get(syn.toLowerCase()));
+	}
+
+	public Qualifier getTermAsQualifier(final String termId) {
+		if (StringUtils.isBlank(termId)) {
+			return OafMapperUtils.unknown(getId(), getName());
+		} else if (termExists(termId)) {
+			final VocabularyTerm t = getTerm(termId);
+			return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
+		} else {
+			return OafMapperUtils.qualifier(termId, termId, getId(), getName());
+		}
+	}
+
+	public Qualifier getSynonymAsQualifier(final String syn) {
+		return Optional
+			.ofNullable(getTermBySynonym(syn))
+			.map(term -> getTermAsQualifier(term.getId()))
+			.orElse(null);
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
@ -0,0 +1,182 @@
+
+package eu.dnetlib.dhp.common.vocabulary;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class VocabularyGroup implements Serializable {
+
+	public static final String VOCABULARIES_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
+		+
+		"let $vocid := $x//VOCABULARY_NAME/@code\n" +
+		"let $vocname := $x//VOCABULARY_NAME/text()\n" +
+		"for $term in ($x//TERM)\n" +
+		"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
+
+	public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
+		+
+		"let $vocid := $x//VOCABULARY_NAME/@code\n" +
+		"let $vocname := $x//VOCABULARY_NAME/text()\n" +
+		"for $term in ($x//TERM)\n" +
+		"for $syn in ($term//SYNONYM/@term)\n" +
+		"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
+
+	public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
+
+		final VocabularyGroup vocs = new VocabularyGroup();
+
+		for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
+			final String[] arr = s.split("@=@");
+			if (arr.length == 4) {
+				final String vocId = arr[0].trim();
+				final String vocName = arr[1].trim();
+				final String termId = arr[2].trim();
+				final String termName = arr[3].trim();
+
+				if (!vocs.vocabularyExists(vocId)) {
+					vocs.addVocabulary(vocId, vocName);
+				}
+
+				vocs.addTerm(vocId, termId, termName);
+			}
+		}
+
+		for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
+			final String[] arr = s.split("@=@");
+			if (arr.length == 3) {
+				final String vocId = arr[0].trim();
+				final String termId = arr[1].trim();
+				final String syn = arr[2].trim();
+
+				vocs.addSynonyms(vocId, termId, syn);
+
+			}
+		}
+
+		// add the term names as synonyms
+		vocs.vocs.values().forEach(voc -> {
+			voc.getTerms().values().forEach(term -> {
+				voc.addSynonym(term.getName().toLowerCase(), term.getId());
+			});
+		});
+
+		return vocs;
+	}
+
+	private final Map<String, Vocabulary> vocs = new HashMap<>();
+
+	public Set<String> vocabularyNames() {
+		return vocs.keySet();
+	}
+
+	public void addVocabulary(final String id, final String name) {
+		vocs.put(id.toLowerCase(), new Vocabulary(id, name));
+	}
+
+	public void addTerm(final String vocId, final String id, final String name) {
+		if (vocabularyExists(vocId)) {
+			vocs.get(vocId.toLowerCase()).addTerm(id, name);
+		}
+	}
+
+	public VocabularyTerm getTerm(final String vocId, final String id) {
+		if (termExists(vocId, id)) {
+			return vocs.get(vocId.toLowerCase()).getTerm(id);
+		} else {
+			return new VocabularyTerm(id, id);
+		}
+	}
+
+	public Set<String> getTerms(String vocId) {
+		if (!vocabularyExists(vocId)) {
+			return new HashSet<>();
+		}
+		return vocs
+			.get(vocId.toLowerCase())
+			.getTerms()
+			.values()
+			.stream()
+			.map(VocabularyTerm::getId)
+			.collect(Collectors.toCollection(HashSet::new));
+	}
+
+	public Qualifier lookup(String vocId, String id) {
+		return Optional
+			.ofNullable(getSynonymAsQualifier(vocId, id))
+			.orElse(getTermAsQualifier(vocId, id));
+	}
+
+	public Qualifier getTermAsQualifier(final String vocId, final String id) {
+		if (vocabularyExists(vocId)) {
+			return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
+		}
+		return OafMapperUtils.qualifier(id, id, "", "");
+	}
+
+	public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
+		if (StringUtils.isBlank(vocId)) {
+			return OafMapperUtils.unknown("", "");
+		}
+		return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
+	}
+
+	/**
+	 * getSynonymAsQualifierCaseSensitive
+	 *
+	 * refelects the situation to check caseSensitive vocabulary
+	 */
+	public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
+		if (StringUtils.isBlank(vocId)) {
+			return OafMapperUtils.unknown("", "");
+		}
+		return vocs.get(vocId).getSynonymAsQualifier(syn);
+	}
+
+	/**
+	 * termExists
+	 *
+	 * two methods: without and with caseSensitive check
+	 */
+	public boolean termExists(final String vocId, final String id) {
+		return termExists(vocId, id, Boolean.FALSE);
+	}
+
+	public boolean termExists(final String vocId, final String id, final Boolean caseSensitive) {
+		if (Boolean.TRUE.equals(caseSensitive)) {
+			return vocabularyExists(vocId) && vocs.get(vocId).termExists(id);
+		}
+		return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
+	}
+
+	public boolean vocabularyExists(final String vocId) {
+		return Optional
+			.ofNullable(vocId)
+			.map(String::toLowerCase)
+			.map(vocs::containsKey)
+			.orElse(false);
+	}
+
+	private void addSynonyms(final String vocId, final String termId, final String syn) {
+		String id = Optional
+			.ofNullable(vocId)
+			.map(String::toLowerCase)
+			.orElseThrow(
+				() -> new IllegalArgumentException(
+					String
+						.format(
+							"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
+		Optional
+			.ofNullable(vocs.get(id))
+			.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
+			.addSynonym(syn.toLowerCase(), termId);
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTerm.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTerm.java
@ -0,0 +1,24 @@
+
+package eu.dnetlib.dhp.common.vocabulary;
+
+import java.io.Serializable;
+
+public class VocabularyTerm implements Serializable {
+
+	private final String id;
+	private final String name;
+
+	public VocabularyTerm(final String id, final String name) {
+		this.id = id;
+		this.name = name;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
@ -0,0 +1,63 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.io.Serializable;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class Message implements Serializable {
+
+	private static final long serialVersionUID = 401753881204524893L;
+
+	public static final String CURRENT_PARAM = "current";
+	public static final String TOTAL_PARAM = "total";
+
+	private MessageType messageType;
+
+	private String workflowId;
+
+	private Map<String, String> body;
+
+	public Message() {
+	}
+
+	public Message(final MessageType messageType, final String workflowId) {
+		this(messageType, workflowId, new LinkedHashMap<>());
+	}
+
+	public Message(final MessageType messageType, final String workflowId, final Map<String, String> body) {
+		this.messageType = messageType;
+		this.workflowId = workflowId;
+		this.body = body;
+	}
+
+	public MessageType getMessageType() {
+		return messageType;
+	}
+
+	public void setMessageType(MessageType messageType) {
+		this.messageType = messageType;
+	}
+
+	public String getWorkflowId() {
+		return workflowId;
+	}
+
+	public void setWorkflowId(final String workflowId) {
+		this.workflowId = workflowId;
+	}
+
+	public Map<String, String> getBody() {
+		return body;
+	}
+
+	public void setBody(final Map<String, String> body) {
+		this.body = body;
+	}
+
+	@Override
+	public String toString() {
+		return String.format("Message [type=%s, workflowId=%s, body=%s]", messageType, workflowId, body);
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
@ -0,0 +1,94 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPut;
+import org.apache.http.entity.ContentType;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class MessageSender {
+
+	private static final Logger log = LoggerFactory.getLogger(MessageSender.class);
+
+	private static final int SOCKET_TIMEOUT_MS = 2000;
+
+	private static final int CONNECTION_REQUEST_TIMEOUT_MS = 2000;
+
+	private static final int CONNTECTION_TIMEOUT_MS = 2000;
+
+	private final ObjectMapper objectMapper = new ObjectMapper();
+
+	private final String dnetMessageEndpoint;
+
+	private final String workflowId;
+
+	private final ExecutorService executorService = Executors.newCachedThreadPool();
+
+	public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
+		this.workflowId = workflowId;
+		this.dnetMessageEndpoint = dnetMessageEndpoint;
+	}
+
+	public void sendMessage(final Message message) {
+		executorService.submit(() -> _sendMessage(message));
+	}
+
+	public void sendMessage(final Long current, final Long total) {
+		sendMessage(createOngoingMessage(current, total));
+	}
+
+	public void sendReport(final Map<String, String> report) {
+		sendMessage(new Message(MessageType.REPORT, workflowId, report));
+	}
+
+	private Message createOngoingMessage(final Long current, final Long total) {
+		final Message m = new Message(MessageType.ONGOING, workflowId);
+		m.getBody().put(Message.CURRENT_PARAM, current.toString());
+		if (total != null) {
+			m.getBody().put(Message.TOTAL_PARAM, total.toString());
+		}
+		return m;
+	}
+
+	private void _sendMessage(final Message message) {
+		try {
+			final String json = objectMapper.writeValueAsString(message);
+
+			final HttpPut req = new HttpPut(dnetMessageEndpoint);
+			req.setEntity(new StringEntity(json, ContentType.APPLICATION_JSON));
+
+			final RequestConfig requestConfig = RequestConfig
+				.custom()
+				.setConnectTimeout(CONNTECTION_TIMEOUT_MS)
+				.setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT_MS)
+				.setSocketTimeout(SOCKET_TIMEOUT_MS)
+				.build();
+
+			try (final CloseableHttpClient client = HttpClients
+				.custom()
+				.setDefaultRequestConfig(requestConfig)
+				.build();
+				final CloseableHttpResponse response = client.execute(req)) {
+				log.debug("Sent Message to " + dnetMessageEndpoint);
+				log.debug("MESSAGE:" + message);
+			} catch (final Throwable e) {
+				log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
+			}
+		} catch (final JsonProcessingException e) {
+			log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.lang3.StringUtils;
+
+public enum MessageType implements Serializable {
+
+	ONGOING, REPORT;
+
+	public MessageType from(String value) {
+		return Optional
+			.ofNullable(value)
+			.map(StringUtils::upperCase)
+			.map(MessageType::valueOf)
+			.orElseThrow(() -> new IllegalArgumentException("unknown message type: " + value));
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
@ -1,121 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import java.io.Serializable;
-
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-/** This class models a record inside the new Metadata store collection on HDFS * */
-public class MetadataRecord implements Serializable {
-
-	/** The D-Net Identifier associated to the record */
-	private String id;
-
-	/** The original Identifier of the record */
-	private String originalId;
-
-	/** The encoding of the record, should be JSON or XML */
-	private String encoding;
-
-	/**
-	 * The information about the provenance of the record see @{@link Provenance} for the model of this information
-	 */
-	private Provenance provenance;
-
-	/** The content of the metadata */
-	private String body;
-
-	/** the date when the record has been stored */
-	private long dateOfCollection;
-
-	/** the date when the record has been stored */
-	private long dateOfTransformation;
-
-	public MetadataRecord() {
-		this.dateOfCollection = System.currentTimeMillis();
-	}
-
-	public MetadataRecord(
-		String originalId,
-		String encoding,
-		Provenance provenance,
-		String body,
-		long dateOfCollection) {
-
-		this.originalId = originalId;
-		this.encoding = encoding;
-		this.provenance = provenance;
-		this.body = body;
-		this.dateOfCollection = dateOfCollection;
-		this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public String getOriginalId() {
-		return originalId;
-	}
-
-	public void setOriginalId(String originalId) {
-		this.originalId = originalId;
-	}
-
-	public String getEncoding() {
-		return encoding;
-	}
-
-	public void setEncoding(String encoding) {
-		this.encoding = encoding;
-	}
-
-	public Provenance getProvenance() {
-		return provenance;
-	}
-
-	public void setProvenance(Provenance provenance) {
-		this.provenance = provenance;
-	}
-
-	public String getBody() {
-		return body;
-	}
-
-	public void setBody(String body) {
-		this.body = body;
-	}
-
-	public long getDateOfCollection() {
-		return dateOfCollection;
-	}
-
-	public void setDateOfCollection(long dateOfCollection) {
-		this.dateOfCollection = dateOfCollection;
-	}
-
-	public long getDateOfTransformation() {
-		return dateOfTransformation;
-	}
-
-	public void setDateOfTransformation(long dateOfTransformation) {
-		this.dateOfTransformation = dateOfTransformation;
-	}
-
-	@Override
-	public boolean equals(Object o) {
-		if (!(o instanceof MetadataRecord)) {
-			return false;
-		}
-		return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
-	}
-
-	@Override
-	public int hashCode() {
-		return id.hashCode();
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
@ -1,52 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import java.io.Serializable;
-
-/**
- * @author Sandro La Bruzzo
- *         <p>
- *         Provenace class models the provenance of the record in the metadataStore It contains the identifier and the
- *         name of the datasource that gives the record
- */
-public class Provenance implements Serializable {
-
-	private String datasourceId;
-
-	private String datasourceName;
-
-	private String nsPrefix;
-
-	public Provenance() {
-	}
-
-	public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
-		this.datasourceId = datasourceId;
-		this.datasourceName = datasourceName;
-		this.nsPrefix = nsPrefix;
-	}
-
-	public String getDatasourceId() {
-		return datasourceId;
-	}
-
-	public void setDatasourceId(String datasourceId) {
-		this.datasourceId = datasourceId;
-	}
-
-	public String getDatasourceName() {
-		return datasourceName;
-	}
-
-	public void setDatasourceName(String datasourceName) {
-		this.datasourceName = datasourceName;
-	}
-
-	public String getNsPrefix() {
-		return nsPrefix;
-	}
-
-	public void setNsPrefix(String nsPrefix) {
-		this.nsPrefix = nsPrefix;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -18,6 +18,9 @@ public class AuthorMerger {

 	private static final Double THRESHOLD = 0.95;

+	private AuthorMerger() {
+	}
+
 	public static List<Author> merge(List<List<Author>> authors) {

 		authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
@ -32,44 +35,54 @@ public class AuthorMerger {

 	}

-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
 		int pa = countAuthorsPids(a);
 		int pb = countAuthorsPids(b);
-		List<Author> base, enrich;
+		List<Author> base;
+		List<Author> enrich;
 		int sa = authorsSize(a);
 		int sb = authorsSize(b);

-		if (pa == pb) {
-			base = sa > sb ? a : b;
-			enrich = sa > sb ? b : a;
-		} else {
+		if (sa == sb) {
 			base = pa > pb ? a : b;
 			enrich = pa > pb ? b : a;
+		} else {
+			base = sa > sb ? a : b;
+			enrich = sa > sb ? b : a;
 		}
-		enrichPidFromList(base, enrich);
+		enrichPidFromList(base, enrich, threshold);
 		return base;
 	}

-	private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
+	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+		return mergeAuthor(a, b, THRESHOLD);
+	}
+
+	private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
 		if (base == null || enrich == null)
 			return;
+
+		// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
 		final Map<String, Author> basePidAuthorMap = base
 			.stream()
-			.filter(a -> a.getPid() != null && a.getPid().size() > 0)
+			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
 			.flatMap(
 				a -> a
 					.getPid()
 					.stream()
+					.filter(Objects::nonNull)
 					.map(p -> new Tuple2<>(pidToComparableString(p), a)))
 			.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));

+		// <pid, Author> (list of pid that are missing in the other list)
 		final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
 			.stream()
-			.filter(a -> a.getPid() != null && a.getPid().size() > 0)
+			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
 			.flatMap(
 				a -> a
 					.getPid()
 					.stream()
+					.filter(Objects::nonNull)
 					.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
 					.map(p -> new Tuple2<>(p, a)))
 			.collect(Collectors.toList());
@ -83,10 +96,10 @@ public class AuthorMerger {
 						.max(Comparator.comparing(Tuple2::_1));

 					if (simAuthor.isPresent()) {
-						double th = THRESHOLD;
+						double th = threshold;
 						// increase the threshold if the surname is too short
 						if (simAuthor.get()._2().getSurname() != null
-							&& simAuthor.get()._2().getSurname().length() <= 3)
+							&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
 							th = 0.99;

 						if (simAuthor.get()._1() > th) {
@ -107,9 +120,9 @@ public class AuthorMerger {
 	}

 	public static String pidToComparableString(StructuredProperty pid) {
-		return (pid.getQualifier() != null
-			? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
-			: "")
+		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
+			: "";
+		return (pid.getQualifier() != null ? classid : "")
 			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
 	}

@ -142,7 +155,7 @@ public class AuthorMerger {
 	}

 	private static boolean hasPid(Author a) {
-		if (a == null || a.getPid() == null || a.getPid().size() == 0)
+		if (a == null || a.getPid() == null || a.getPid().isEmpty())
 			return false;
 		return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
 	}
@ -151,12 +164,15 @@ public class AuthorMerger {
 		if (StringUtils.isNotBlank(author.getSurname())) {
 			return new Person(author.getSurname() + ", " + author.getName(), false);
 		} else {
-			return new Person(author.getFullname(), false);
+			if (StringUtils.isNotBlank(author.getFullname()))
+				return new Person(author.getFullname(), false);
+			else
+				return new Person("", false);
 		}
 	}

 	private static String normalize(final String s) {
-		return nfd(s)
+		String[] normalized = nfd(s)
 			.toLowerCase()
 			// do not compact the regexes in a single expression, would cause StackOverflowError
 			// in case
@ -166,7 +182,12 @@ public class AuthorMerger {
 			.replaceAll("(\\p{Punct})+", " ")
 			.replaceAll("(\\d)+", " ")
 			.replaceAll("(\\n)+", " ")
-			.trim();
+			.trim()
+			.split(" ");
+
+		Arrays.sort(normalized);
+
+		return String.join(" ", normalized);
 	}

 	private static String nfd(final String s) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java
@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
 /** Created by sandro on 9/29/16. */
 public class VtdUtilityParser {

+	private VtdUtilityParser() {
+	}
+
 	public static List<Node> getTextValuesWithAttributes(
 		final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
 		throws VtdException {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java
@ -1,238 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf;
-
-import java.util.LinkedHashMap;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang3.StringUtils;
-
-import com.clearspring.analytics.util.Lists;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-
-public class CleaningFunctions {
-
-	public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
-	public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
-	public static final String NONE = "none";
-
-	public static <T extends Oaf> T fixVocabularyNames(T value) {
-		if (value instanceof Datasource) {
-			// nothing to clean here
-		} else if (value instanceof Project) {
-			// nothing to clean here
-		} else if (value instanceof Organization) {
-			Organization o = (Organization) value;
-			if (Objects.nonNull(o.getCountry())) {
-				fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
-			}
-		} else if (value instanceof Relation) {
-			// nothing to clean here
-		} else if (value instanceof Result) {
-
-			Result r = (Result) value;
-
-			fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
-			fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
-			fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
-
-			if (Objects.nonNull(r.getSubject())) {
-				r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES));
-			}
-			if (Objects.nonNull(r.getInstance())) {
-				for (Instance i : r.getInstance()) {
-					fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
-					fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
-				}
-			}
-			if (Objects.nonNull(r.getAuthor())) {
-				r.getAuthor().forEach(a -> {
-					if (Objects.nonNull(a.getPid())) {
-						a.getPid().forEach(p -> {
-							fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
-						});
-					}
-				});
-			}
-			if (value instanceof Publication) {
-
-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
-
-			} else if (value instanceof OtherResearchProduct) {
-
-			} else if (value instanceof Software) {
-
-			}
-		}
-
-		return value;
-	}
-
-	public static <T extends Oaf> T fixDefaults(T value) {
-		if (value instanceof Datasource) {
-			// nothing to clean here
-		} else if (value instanceof Project) {
-			// nothing to clean here
-		} else if (value instanceof Organization) {
-			Organization o = (Organization) value;
-			if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
-				o.setCountry(qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_COUNTRY_TYPE));
-			}
-		} else if (value instanceof Relation) {
-			// nothing to clean here
-		} else if (value instanceof Result) {
-
-			Result r = (Result) value;
-			if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
-				r.setPublisher(null);
-			}
-			if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
-				r
-					.setLanguage(
-						qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
-			}
-			if (Objects.nonNull(r.getSubject())) {
-				r
-					.setSubject(
-						r
-							.getSubject()
-							.stream()
-							.filter(Objects::nonNull)
-							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
-							.filter(sp -> Objects.nonNull(sp.getQualifier()))
-							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
-							.collect(Collectors.toList()));
-			}
-			if (Objects.nonNull(r.getPid())) {
-				r
-					.setPid(
-						r
-							.getPid()
-							.stream()
-							.filter(Objects::nonNull)
-							.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
-							.filter(sp -> NONE.equalsIgnoreCase(sp.getValue()))
-							.filter(sp -> Objects.nonNull(sp.getQualifier()))
-							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
-							.map(CleaningFunctions::normalizePidValue)
-							.collect(Collectors.toList()));
-			}
-			if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
-				r
-					.setResourcetype(
-						qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
-			}
-			if (Objects.nonNull(r.getInstance())) {
-				for (Instance i : r.getInstance()) {
-					if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
-						i.setAccessright(qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
-					}
-					if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
-						i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
-					}
-					if (Objects.isNull(i.getRefereed())) {
-						i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
-					}
-				}
-			}
-			if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
-				Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
-				if (Objects.isNull(bestaccessrights)) {
-					r
-						.setBestaccessright(
-							qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
-				} else {
-					r.setBestaccessright(bestaccessrights);
-				}
-			}
-			if (Objects.nonNull(r.getAuthor())) {
-				boolean nullRank = r
-					.getAuthor()
-					.stream()
-					.anyMatch(a -> Objects.isNull(a.getRank()));
-				if (nullRank) {
-					int i = 1;
-					for (Author author : r.getAuthor()) {
-						author.setRank(i++);
-					}
-				}
-				for (Author a : r.getAuthor()) {
-					if (Objects.isNull(a.getPid())) {
-						a.setPid(Lists.newArrayList());
-					} else {
-						a
-							.setPid(
-								a
-									.getPid()
-									.stream()
-									.filter(p -> Objects.nonNull(p.getQualifier()))
-									.filter(p -> StringUtils.isNotBlank(p.getValue()))
-									.map(p -> {
-										p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
-										return p;
-									})
-									.collect(
-										Collectors
-											.toMap(
-												StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1,
-												LinkedHashMap::new))
-									.values()
-									.stream()
-									.collect(Collectors.toList()));
-					}
-				}
-
-			}
-			if (value instanceof Publication) {
-
-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
-
-			} else if (value instanceof OtherResearchProduct) {
-
-			} else if (value instanceof Software) {
-
-			}
-		}
-
-		return value;
-	}
-
-	// HELPERS
-
-	private static void fixVocabName(Qualifier q, String vocabularyName) {
-		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
-			q.setSchemeid(vocabularyName);
-			q.setSchemename(vocabularyName);
-		}
-	}
-
-	private static Qualifier qualifier(String classid, String classname, String scheme) {
-		return OafMapperUtils
-			.qualifier(
-				classid, classname, scheme, scheme);
-	}
-
-	/**
-	 * Utility method that normalises PID values on a per-type basis.
-	 * @param pid the PID whose value will be normalised.
-	 * @return the PID containing the normalised value.
-	 */
-	public static StructuredProperty normalizePidValue(StructuredProperty pid) {
-		String value = Optional
-			.ofNullable(pid.getValue())
-			.map(String::trim)
-			.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
-		switch (pid.getQualifier().getClassid()) {
-
-			// TODO add cleaning for more PID types as needed
-			case "doi":
-				pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, ""));
-				break;
-		}
-		return pid;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ModelHardLimits.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ModelHardLimits.java
@ -1,14 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf;
-
-public class ModelHardLimits {
-
-	public static final int MAX_EXTERNAL_ENTITIES = 50;
-	public static final int MAX_AUTHORS = 200;
-	public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
-	public static final int MAX_TITLE_LENGTH = 5000;
-	public static final int MAX_TITLES = 10;
-	public static final int MAX_ABSTRACT_LENGTH = 150000;
-	public static final int MAX_INSTANCES = 10;
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java
@ -1,296 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf;
-
-import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
-
-import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.function.Function;
-import java.util.function.Predicate;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang3.StringUtils;
-
-import eu.dnetlib.dhp.schema.common.LicenseComparator;
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-public class OafMapperUtils {
-
-	public static KeyValue keyValue(final String k, final String v) {
-		final KeyValue kv = new KeyValue();
-		kv.setKey(k);
-		kv.setValue(v);
-		return kv;
-	}
-
-	public static List<KeyValue> listKeyValues(final String... s) {
-		if (s.length % 2 > 0) {
-			throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)");
-		}
-
-		final List<KeyValue> list = new ArrayList<>();
-		for (int i = 0; i < s.length; i += 2) {
-			list.add(keyValue(s[i], s[i + 1]));
-		}
-		return list;
-	}
-
-	public static <T> Field<T> field(final T value, final DataInfo info) {
-		if (value == null || StringUtils.isBlank(value.toString())) {
-			return null;
-		}
-
-		final Field<T> field = new Field<>();
-		field.setValue(value);
-		field.setDataInfo(info);
-		return field;
-	}
-
-	public static List<Field<String>> listFields(final DataInfo info, final String... values) {
-		return Arrays
-			.stream(values)
-			.map(v -> field(v, info))
-			.filter(Objects::nonNull)
-			.filter(distinctByKey(f -> f.getValue()))
-			.collect(Collectors.toList());
-	}
-
-	public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
-		return values
-			.stream()
-			.map(v -> field(v, info))
-			.filter(Objects::nonNull)
-			.filter(distinctByKey(f -> f.getValue()))
-			.collect(Collectors.toList());
-	}
-
-	public static Qualifier unknown(final String schemeid, final String schemename) {
-		return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
-	}
-
-	public static Qualifier qualifier(
-		final String classid,
-		final String classname,
-		final String schemeid,
-		final String schemename) {
-		final Qualifier q = new Qualifier();
-		q.setClassid(classid);
-		q.setClassname(classname);
-		q.setSchemeid(schemeid);
-		q.setSchemename(schemename);
-		return q;
-	}
-
-	public static StructuredProperty structuredProperty(
-		final String value,
-		final String classid,
-		final String classname,
-		final String schemeid,
-		final String schemename,
-		final DataInfo dataInfo) {
-
-		return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
-	}
-
-	public static StructuredProperty structuredProperty(
-		final String value,
-		final Qualifier qualifier,
-		final DataInfo dataInfo) {
-		if (value == null) {
-			return null;
-		}
-		final StructuredProperty sp = new StructuredProperty();
-		sp.setValue(value);
-		sp.setQualifier(qualifier);
-		sp.setDataInfo(dataInfo);
-		return sp;
-	}
-
-	public static ExtraInfo extraInfo(
-		final String name,
-		final String value,
-		final String typology,
-		final String provenance,
-		final String trust) {
-		final ExtraInfo info = new ExtraInfo();
-		info.setName(name);
-		info.setValue(value);
-		info.setTypology(typology);
-		info.setProvenance(provenance);
-		info.setTrust(trust);
-		return info;
-	}
-
-	public static OAIProvenance oaiIProvenance(
-		final String identifier,
-		final String baseURL,
-		final String metadataNamespace,
-		final Boolean altered,
-		final String datestamp,
-		final String harvestDate) {
-
-		final OriginDescription desc = new OriginDescription();
-		desc.setIdentifier(identifier);
-		desc.setBaseURL(baseURL);
-		desc.setMetadataNamespace(metadataNamespace);
-		desc.setAltered(altered);
-		desc.setDatestamp(datestamp);
-		desc.setHarvestDate(harvestDate);
-
-		final OAIProvenance p = new OAIProvenance();
-		p.setOriginDescription(desc);
-
-		return p;
-	}
-
-	public static Journal journal(
-		final String name,
-		final String issnPrinted,
-		final String issnOnline,
-		final String issnLinking,
-		final DataInfo dataInfo) {
-		return journal(
-			name,
-			issnPrinted,
-			issnOnline,
-			issnLinking,
-			null,
-			null,
-			null,
-			null,
-			null,
-			null,
-			null,
-			dataInfo);
-	}
-
-	public static Journal journal(
-		final String name,
-		final String issnPrinted,
-		final String issnOnline,
-		final String issnLinking,
-		final String ep,
-		final String iss,
-		final String sp,
-		final String vol,
-		final String edition,
-		final String conferenceplace,
-		final String conferencedate,
-		final DataInfo dataInfo) {
-
-		if (StringUtils.isNotBlank(name)
-			|| StringUtils.isNotBlank(issnPrinted)
-			|| StringUtils.isNotBlank(issnOnline)
-			|| StringUtils.isNotBlank(issnLinking)) {
-			final Journal j = new Journal();
-			j.setName(name);
-			j.setIssnPrinted(issnPrinted);
-			j.setIssnOnline(issnOnline);
-			j.setIssnLinking(issnLinking);
-			j.setEp(ep);
-			j.setIss(iss);
-			j.setSp(sp);
-			j.setVol(vol);
-			j.setEdition(edition);
-			j.setConferenceplace(conferenceplace);
-			j.setConferencedate(conferencedate);
-			j.setDataInfo(dataInfo);
-			return j;
-		} else {
-			return null;
-		}
-	}
-
-	public static DataInfo dataInfo(
-		final Boolean deletedbyinference,
-		final String inferenceprovenance,
-		final Boolean inferred,
-		final Boolean invisible,
-		final Qualifier provenanceaction,
-		final String trust) {
-		final DataInfo d = new DataInfo();
-		d.setDeletedbyinference(deletedbyinference);
-		d.setInferenceprovenance(inferenceprovenance);
-		d.setInferred(inferred);
-		d.setInvisible(invisible);
-		d.setProvenanceaction(provenanceaction);
-		d.setTrust(trust);
-		return d;
-	}
-
-	public static String createOpenaireId(
-		final int prefix,
-		final String originalId,
-		final boolean to_md5) {
-		if (StringUtils.isBlank(originalId)) {
-			return null;
-		} else if (to_md5) {
-			final String nsPrefix = StringUtils.substringBefore(originalId, "::");
-			final String rest = StringUtils.substringAfter(originalId, "::");
-			return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
-		} else {
-			return String.format("%s|%s", prefix, originalId);
-		}
-	}
-
-	public static String createOpenaireId(
-		final String type,
-		final String originalId,
-		final boolean to_md5) {
-		switch (type) {
-			case "datasource":
-				return createOpenaireId(10, originalId, to_md5);
-			case "organization":
-				return createOpenaireId(20, originalId, to_md5);
-			case "person":
-				return createOpenaireId(30, originalId, to_md5);
-			case "project":
-				return createOpenaireId(40, originalId, to_md5);
-			default:
-				return createOpenaireId(50, originalId, to_md5);
-		}
-	}
-
-	public static String asString(final Object o) {
-		return o == null ? "" : o.toString();
-	}
-
-	public static <T> Predicate<T> distinctByKey(
-		final Function<? super T, ?> keyExtractor) {
-		final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
-		return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
-	}
-
-	public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
-		return getBestAccessRights(instanceList);
-	}
-
-	protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
-		if (instanceList != null) {
-			final Optional<Qualifier> min = instanceList
-				.stream()
-				.map(i -> i.getAccessright())
-				.min(new LicenseComparator());
-
-			final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
-
-			if (StringUtils.isBlank(rights.getClassid())) {
-				rights.setClassid(UNKNOWN);
-			}
-			if (StringUtils.isBlank(rights.getClassname())
-				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
-				rights.setClassname(NOT_AVAILABLE);
-			}
-			if (StringUtils.isBlank(rights.getSchemeid())) {
-				rights.setSchemeid(DNET_ACCESS_MODES);
-			}
-			if (StringUtils.isBlank(rights.getSchemename())) {
-				rights.setSchemename(DNET_ACCESS_MODES);
-			}
-
-			return rights;
-		}
-		return null;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
@ -1,49 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-
-public class ResultTypeComparator implements Comparator<Result> {
-
-	@Override
-	public int compare(Result left, Result right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		String lClass = left.getResulttype().getClassid();
-		String rClass = right.getResulttype().getClassid();
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-			return 1;
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return lClass.compareTo(rClass);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -0,0 +1,528 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.time.LocalDate;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.github.sisyphsu.dateparser.DateParserUtils;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+import me.xuender.unidecode.Unidecode;
+
+public class GraphCleaningFunctions extends CleaningFunctions {
+
+	public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
+	public static final int ORCID_LEN = 19;
+	public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
+	public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
+
+	public static final String TITLE_TEST = "test";
+	public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
+
+	public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
+
+	public static <T extends Oaf> T fixVocabularyNames(T value) {
+		if (value instanceof Datasource) {
+			// nothing to clean here
+		} else if (value instanceof Project) {
+			// nothing to clean here
+		} else if (value instanceof Organization) {
+			Organization o = (Organization) value;
+			if (Objects.nonNull(o.getCountry())) {
+				fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
+			}
+		} else if (value instanceof Relation) {
+			// nothing to clean here
+		} else if (value instanceof Result) {
+
+			Result r = (Result) value;
+
+			fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
+			fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
+			fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
+
+			if (Objects.nonNull(r.getSubject())) {
+				r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES));
+			}
+			if (Objects.nonNull(r.getInstance())) {
+				for (Instance i : r.getInstance()) {
+					fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
+					fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
+				}
+			}
+			if (Objects.nonNull(r.getAuthor())) {
+				r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
+					if (Objects.nonNull(a.getPid())) {
+						a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
+							fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
+						});
+					}
+				});
+			}
+			if (value instanceof Publication) {
+
+			} else if (value instanceof Dataset) {
+
+			} else if (value instanceof OtherResearchProduct) {
+
+			} else if (value instanceof Software) {
+
+			}
+		}
+
+		return value;
+	}
+
+	public static <T extends Oaf> boolean filter(T value) {
+		if (Boolean.TRUE
+			.equals(
+				Optional
+					.ofNullable(value)
+					.map(
+						o -> Optional
+							.ofNullable(o.getDataInfo())
+							.map(
+								d -> Optional
+									.ofNullable(d.getInvisible())
+									.orElse(true))
+							.orElse(true))
+					.orElse(true))) {
+			return true;
+		}
+
+		if (value instanceof Datasource) {
+			// nothing to evaluate here
+		} else if (value instanceof Project) {
+			// nothing to evaluate here
+		} else if (value instanceof Organization) {
+			// nothing to evaluate here
+		} else if (value instanceof Relation) {
+			// nothing to clean here
+		} else if (value instanceof Result) {
+
+			Result r = (Result) value;
+
+			if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
+				return false;
+			}
+
+			if (value instanceof Publication) {
+
+			} else if (value instanceof Dataset) {
+
+			} else if (value instanceof OtherResearchProduct) {
+
+			} else if (value instanceof Software) {
+
+			}
+		}
+		return true;
+	}
+
+	public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
+		if (value instanceof Datasource) {
+			// nothing to clean here
+		} else if (value instanceof Project) {
+			// nothing to clean here
+		} else if (value instanceof Organization) {
+			Organization o = (Organization) value;
+			if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
+				o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
+			}
+		} else if (value instanceof Relation) {
+			Relation r = (Relation) value;
+
+			Optional<String> validationDate = doCleanDate(r.getValidationDate());
+			if (validationDate.isPresent()) {
+				r.setValidationDate(validationDate.get());
+				r.setValidated(true);
+			} else {
+				r.setValidationDate(null);
+				r.setValidated(false);
+			}
+		} else if (value instanceof Result) {
+
+			Result r = (Result) value;
+
+			if (Objects.nonNull(r.getDateofacceptance())) {
+				Optional<String> date = cleanDateField(r.getDateofacceptance());
+				if (date.isPresent()) {
+					r.getDateofacceptance().setValue(date.get());
+				} else {
+					r.setDateofacceptance(null);
+				}
+			}
+			if (Objects.nonNull(r.getRelevantdate())) {
+				r
+					.setRelevantdate(
+						r
+							.getRelevantdate()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(sp -> Objects.nonNull(sp.getQualifier()))
+							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+							.map(sp -> {
+								sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue()));
+								return sp;
+							})
+							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+							.collect(Collectors.toList()));
+			}
+			if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
+				r.setPublisher(null);
+			}
+			if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
+				r
+					.setLanguage(
+						qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
+			}
+			if (Objects.nonNull(r.getSubject())) {
+				r
+					.setSubject(
+						r
+							.getSubject()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+							.filter(sp -> Objects.nonNull(sp.getQualifier()))
+							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+							.map(GraphCleaningFunctions::cleanValue)
+							.collect(Collectors.toList()));
+			}
+			if (Objects.nonNull(r.getTitle())) {
+				r
+					.setTitle(
+						r
+							.getTitle()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+							.filter(
+								sp -> {
+									final String title = sp
+										.getValue()
+										.toLowerCase();
+									final String decoded = Unidecode.decode(title);
+
+									if (StringUtils.contains(decoded, TITLE_TEST)) {
+										return decoded
+											.replaceAll(TITLE_FILTER_REGEX, "")
+											.length() > TITLE_FILTER_RESIDUAL_LENGTH;
+									}
+									return !decoded
+										.replaceAll("\\W|\\d", "")
+										.isEmpty();
+								})
+							.map(GraphCleaningFunctions::cleanValue)
+							.collect(Collectors.toList()));
+			}
+			if (Objects.nonNull(r.getFormat())) {
+				r
+					.setFormat(
+						r
+							.getFormat()
+							.stream()
+							.map(GraphCleaningFunctions::cleanValue)
+							.collect(Collectors.toList()));
+			}
+			if (Objects.nonNull(r.getDescription())) {
+				r
+					.setDescription(
+						r
+							.getDescription()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+							.map(GraphCleaningFunctions::cleanValue)
+							.collect(Collectors.toList()));
+			}
+			if (Objects.nonNull(r.getPid())) {
+				r.setPid(processPidCleaning(r.getPid()));
+			}
+			if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
+				r
+					.setResourcetype(
+						qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
+			}
+			if (Objects.nonNull(r.getInstance())) {
+
+				for (Instance i : r.getInstance()) {
+					if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) {
+						if (r instanceof Publication) {
+							i
+								.setInstancetype(
+									OafMapperUtils
+										.qualifier(
+											"0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+											ModelConstants.DNET_PUBLICATION_RESOURCE));
+						} else if (r instanceof Dataset) {
+							i
+								.setInstancetype(
+									OafMapperUtils
+										.qualifier(
+											"0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+											ModelConstants.DNET_PUBLICATION_RESOURCE));
+						} else if (r instanceof Software) {
+							i
+								.setInstancetype(
+									OafMapperUtils
+										.qualifier(
+											"0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+											ModelConstants.DNET_PUBLICATION_RESOURCE));
+						} else if (r instanceof OtherResearchProduct) {
+							i
+								.setInstancetype(
+									OafMapperUtils
+										.qualifier(
+											"0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+											ModelConstants.DNET_PUBLICATION_RESOURCE));
+						}
+					}
+
+					if (Objects.nonNull(i.getPid())) {
+						i.setPid(processPidCleaning(i.getPid()));
+					}
+					if (Objects.nonNull(i.getAlternateIdentifier())) {
+						i.setAlternateIdentifier(processPidCleaning(i.getAlternateIdentifier()));
+					}
+					Optional
+						.ofNullable(i.getPid())
+						.ifPresent(pid -> {
+							final Set<StructuredProperty> pids = Sets.newHashSet(pid);
+							Optional
+								.ofNullable(i.getAlternateIdentifier())
+								.ifPresent(altId -> {
+									final Set<StructuredProperty> altIds = Sets.newHashSet(altId);
+									i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
+								});
+						});
+
+					if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
+						i
+							.setAccessright(
+								accessRight(
+									ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+									ModelConstants.DNET_ACCESS_MODES));
+					}
+					if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
+						i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
+					}
+					if (Objects.isNull(i.getRefereed())) {
+						i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
+					}
+					if (Objects.nonNull(i.getDateofacceptance())) {
+						Optional<String> date = cleanDateField(i.getDateofacceptance());
+						if (date.isPresent()) {
+							i.getDateofacceptance().setValue(date.get());
+						} else {
+							i.setDateofacceptance(null);
+						}
+					}
+				}
+			}
+			if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
+				Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
+				if (Objects.isNull(bestaccessrights)) {
+					r
+						.setBestaccessright(
+							qualifier(
+								ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+								ModelConstants.DNET_ACCESS_MODES));
+				} else {
+					r.setBestaccessright(bestaccessrights);
+				}
+			}
+			if (Objects.nonNull(r.getAuthor())) {
+				r
+					.setAuthor(
+						r
+							.getAuthor()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(a -> StringUtils.isNotBlank(a.getFullname()))
+							.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
+							.collect(Collectors.toList()));
+
+				boolean nullRank = r
+					.getAuthor()
+					.stream()
+					.anyMatch(a -> Objects.isNull(a.getRank()));
+				if (nullRank) {
+					int i = 1;
+					for (Author author : r.getAuthor()) {
+						author.setRank(i++);
+					}
+				}
+
+				for (Author a : r.getAuthor()) {
+					if (Objects.isNull(a.getPid())) {
+						a.setPid(Lists.newArrayList());
+					} else {
+						a
+							.setPid(
+								a
+									.getPid()
+									.stream()
+									.filter(Objects::nonNull)
+									.filter(p -> Objects.nonNull(p.getQualifier()))
+									.filter(p -> StringUtils.isNotBlank(p.getValue()))
+									.map(p -> {
+										// hack to distinguish orcid from orcid_pending
+										String pidProvenance = Optional
+											.ofNullable(p.getDataInfo())
+											.map(
+												d -> Optional
+													.ofNullable(d.getProvenanceaction())
+													.map(Qualifier::getClassid)
+													.orElse(""))
+											.orElse("");
+										if (p
+											.getQualifier()
+											.getClassid()
+											.toLowerCase()
+											.contains(ModelConstants.ORCID)) {
+											if (pidProvenance
+												.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
+												p.getQualifier().setClassid(ModelConstants.ORCID);
+											} else {
+												p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
+											}
+											final String orcid = p
+												.getValue()
+												.trim()
+												.toLowerCase()
+												.replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
+											if (orcid.length() == ORCID_LEN) {
+												p.setValue(orcid);
+											} else {
+												p.setValue("");
+											}
+										}
+										return p;
+									})
+									.filter(p -> StringUtils.isNotBlank(p.getValue()))
+									.collect(
+										Collectors
+											.toMap(
+												p -> p.getQualifier().getClassid() + p.getValue(),
+												Function.identity(),
+												(p1, p2) -> p1,
+												LinkedHashMap::new))
+									.values()
+									.stream()
+									.collect(Collectors.toList()));
+					}
+				}
+			}
+			if (value instanceof Publication) {
+
+			} else if (value instanceof Dataset) {
+
+			} else if (value instanceof OtherResearchProduct) {
+
+			} else if (value instanceof Software) {
+
+			}
+		}
+
+		return value;
+	}
+
+	private static Optional<String> cleanDateField(Field<String> dateofacceptance) {
+		return Optional
+			.ofNullable(dateofacceptance)
+			.map(Field::getValue)
+			.map(GraphCleaningFunctions::cleanDate)
+			.filter(Objects::nonNull);
+	}
+
+	protected static Optional<String> doCleanDate(String date) {
+		return Optional.ofNullable(cleanDate(date));
+	}
+
+	public static String cleanDate(final String inputDate) {
+
+		if (StringUtils.isBlank(inputDate)) {
+			return null;
+		}
+
+		try {
+			final LocalDate date = DateParserUtils
+				.parseDate(inputDate.trim())
+				.toInstant()
+				.atZone(ZoneId.systemDefault())
+				.toLocalDate();
+			return DateTimeFormatter.ofPattern(ModelSupport.DATE_FORMAT).format(date);
+		} catch (DateTimeParseException e) {
+			return null;
+		}
+	}
+
+	// HELPERS
+
+	private static boolean isValidAuthorName(Author a) {
+		return !Stream
+			.of(a.getFullname(), a.getName(), a.getSurname())
+			.filter(s -> s != null && !s.isEmpty())
+			.collect(Collectors.joining(""))
+			.toLowerCase()
+			.matches(INVALID_AUTHOR_REGEX);
+	}
+
+	private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
+		return pids
+			.stream()
+			.filter(Objects::nonNull)
+			.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
+			.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
+			.filter(sp -> Objects.nonNull(sp.getQualifier()))
+			.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+			.map(CleaningFunctions::normalizePidValue)
+			.filter(CleaningFunctions::pidFilter)
+			.collect(Collectors.toList());
+	}
+
+	private static void fixVocabName(Qualifier q, String vocabularyName) {
+		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
+			q.setSchemeid(vocabularyName);
+			q.setSchemename(vocabularyName);
+		}
+	}
+
+	private static AccessRight accessRight(String classid, String classname, String scheme) {
+		return OafMapperUtils
+			.accessRight(
+				classid, classname, scheme, scheme);
+	}
+
+	private static Qualifier qualifier(String classid, String classname, String scheme) {
+		return OafMapperUtils
+			.qualifier(
+				classid, classname, scheme, scheme);
+	}
+
+	protected static StructuredProperty cleanValue(StructuredProperty s) {
+		s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
+		return s;
+	}
+
+	protected static Field<String> cleanValue(Field<String> s) {
+		s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
+		return s;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
@ -1,102 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.io.Serializable;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.regex.Pattern;
-
-import org.apache.commons.lang.StringUtils;
-
-import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-/**
- * Factory class for OpenAIRE identifiers in the Graph
- */
-public class IdentifierFactory implements Serializable {
-
-	public static final String ID_SEPARATOR = "::";
-	public static final String ID_PREFIX_SEPARATOR = "|";
-	public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
-		+ "[a-zA-Z0-9]{32}$";
-
-	public final static String DOI_REGEX = "(^10\\.[0-9]{4,9}\\/[-._;()\\/:a-zA-Z0-9]+$)|" +
-		"(^10\\.1002\\/[^\\s]+$)|" +
-		"(^10\\.1021\\/[a-zA-Z0-9_][a-zA-Z0-9_][0-9]++$)|" +
-		"(^10\\.1207\\/[a-zA-Z0-9_]+\\&[0-9]+_[0-9]+$)";
-
-	public static final int ID_PREFIX_LEN = 12;
-	public static final String NONE = "none";
-
-	/**
-	 * Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id
-	 * when no PID is available
-	 * @param entity the entity providing PIDs and a default ID.
-	 * @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
-	 * @return an identifier from the most relevant PID, entity.id otherwise
-	 */
-	public static <T extends OafEntity> String createIdentifier(T entity) {
-
-		if (Objects.isNull(entity.getPid()) || entity.getPid().isEmpty()) {
-			return entity.getId();
-		}
-
-		return entity
-			.getPid()
-			.stream()
-			.filter(s -> pidFilter(s))
-			.min(new PidComparator<>(entity))
-			.map(s -> idFromPid(entity, s))
-			.map(IdentifierFactory::verifyIdSyntax)
-			.orElseGet(entity::getId);
-	}
-
-	protected static boolean pidFilter(StructuredProperty s) {
-		if (Objects.isNull(s.getQualifier()) ||
-			StringUtils.isBlank(StringUtils.trim(s.getValue()))) {
-			return false;
-		}
-		try {
-			switch (PidType.valueOf(s.getQualifier().getClassid())) {
-				case doi:
-					final String doi = StringUtils.trim(StringUtils.lowerCase(s.getValue()));
-					return doi.matches(DOI_REGEX);
-				default:
-					return true;
-			}
-		} catch (IllegalArgumentException e) {
-			return false;
-		}
-	}
-
-	private static String verifyIdSyntax(String s) {
-		if (StringUtils.isBlank(s) || !s.matches(ID_REGEX)) {
-			throw new RuntimeException(String.format("malformed id: '%s'", s));
-		} else {
-			return s;
-		}
-	}
-
-	private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s) {
-		return new StringBuilder()
-			.append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR))
-			.append(ID_PREFIX_SEPARATOR)
-			.append(createPrefix(s.getQualifier().getClassid()))
-			.append(ID_SEPARATOR)
-			.append(DHPUtils.md5(CleaningFunctions.normalizePidValue(s).getValue()))
-			.toString();
-	}
-
-	// create the prefix (length = 12)
-	private static String createPrefix(String pidType) {
-		StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
-		while (prefix.length() < ID_PREFIX_LEN) {
-			prefix.append("_");
-		}
-		return prefix.substring(0, ID_PREFIX_LEN);
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -0,0 +1,371 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.schema.common.AccessRightComparator;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+public class OafMapperUtils {
+
+	private OafMapperUtils() {
+	}
+
+	public static Oaf merge(final Oaf left, final Oaf right) {
+		if (ModelSupport.isSubClass(left, OafEntity.class)) {
+			return mergeEntities((OafEntity) left, (OafEntity) right);
+		} else if (ModelSupport.isSubClass(left, Relation.class)) {
+			((Relation) left).mergeFrom((Relation) right);
+		} else {
+			throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
+		}
+		return left;
+	}
+
+	public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
+		if (ModelSupport.isSubClass(left, Result.class)) {
+			return mergeResults((Result) left, (Result) right);
+		} else if (ModelSupport.isSubClass(left, Datasource.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Organization.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Project.class)) {
+			left.mergeFrom(right);
+		} else {
+			throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
+		}
+		return left;
+	}
+
+	public static Result mergeResults(Result left, Result right) {
+		if (new ResultTypeComparator().compare(left, right) < 0) {
+			left.mergeFrom(right);
+			return left;
+		} else {
+			right.mergeFrom(left);
+			return right;
+		}
+	}
+
+	public static KeyValue keyValue(final String k, final String v) {
+		final KeyValue kv = new KeyValue();
+		kv.setKey(k);
+		kv.setValue(v);
+		return kv;
+	}
+
+	public static List<KeyValue> listKeyValues(final String... s) {
+		if (s.length % 2 > 0) {
+			throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
+		}
+
+		final List<KeyValue> list = new ArrayList<>();
+		for (int i = 0; i < s.length; i += 2) {
+			list.add(keyValue(s[i], s[i + 1]));
+		}
+		return list;
+	}
+
+	public static <T> Field<T> field(final T value, final DataInfo info) {
+		if (value == null || StringUtils.isBlank(value.toString())) {
+			return null;
+		}
+
+		final Field<T> field = new Field<>();
+		field.setValue(value);
+		field.setDataInfo(info);
+		return field;
+	}
+
+	public static List<Field<String>> listFields(final DataInfo info, final String... values) {
+		return Arrays
+			.stream(values)
+			.map(v -> field(v, info))
+			.filter(Objects::nonNull)
+			.filter(distinctByKey(Field::getValue))
+			.collect(Collectors.toList());
+	}
+
+	public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
+		return values
+			.stream()
+			.map(v -> field(v, info))
+			.filter(Objects::nonNull)
+			.filter(distinctByKey(Field::getValue))
+			.collect(Collectors.toList());
+	}
+
+	public static Qualifier unknown(final String schemeid, final String schemename) {
+		return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
+	}
+
+	public static AccessRight accessRight(
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename) {
+		return accessRight(classid, classname, schemeid, schemename, null);
+	}
+
+	public static AccessRight accessRight(
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename,
+		final OpenAccessRoute openAccessRoute) {
+		final AccessRight accessRight = new AccessRight();
+		accessRight.setClassid(classid);
+		accessRight.setClassname(classname);
+		accessRight.setSchemeid(schemeid);
+		accessRight.setSchemename(schemename);
+		accessRight.setOpenAccessRoute(openAccessRoute);
+		return accessRight;
+	}
+
+	public static Qualifier qualifier(
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename) {
+		final Qualifier q = new Qualifier();
+		q.setClassid(classid);
+		q.setClassname(classname);
+		q.setSchemeid(schemeid);
+		q.setSchemename(schemename);
+		return q;
+	}
+
+	public static Qualifier qualifier(final Qualifier qualifier) {
+		final Qualifier q = new Qualifier();
+		q.setClassid(qualifier.getClassid());
+		q.setClassname(qualifier.getClassname());
+		q.setSchemeid(qualifier.getSchemeid());
+		q.setSchemename(qualifier.getSchemename());
+		return q;
+	}
+
+	public static StructuredProperty structuredProperty(
+		final String value,
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename,
+		final DataInfo dataInfo) {
+
+		return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
+	}
+
+	public static StructuredProperty structuredProperty(
+		final String value,
+		final Qualifier qualifier,
+		final DataInfo dataInfo) {
+		if (value == null) {
+			return null;
+		}
+		final StructuredProperty sp = new StructuredProperty();
+		sp.setValue(value);
+		sp.setQualifier(qualifier);
+		sp.setDataInfo(dataInfo);
+		return sp;
+	}
+
+	public static ExtraInfo extraInfo(
+		final String name,
+		final String value,
+		final String typology,
+		final String provenance,
+		final String trust) {
+		final ExtraInfo info = new ExtraInfo();
+		info.setName(name);
+		info.setValue(value);
+		info.setTypology(typology);
+		info.setProvenance(provenance);
+		info.setTrust(trust);
+		return info;
+	}
+
+	public static OAIProvenance oaiIProvenance(
+		final String identifier,
+		final String baseURL,
+		final String metadataNamespace,
+		final Boolean altered,
+		final String datestamp,
+		final String harvestDate) {
+
+		final OriginDescription desc = new OriginDescription();
+		desc.setIdentifier(identifier);
+		desc.setBaseURL(baseURL);
+		desc.setMetadataNamespace(metadataNamespace);
+		desc.setAltered(altered);
+		desc.setDatestamp(datestamp);
+		desc.setHarvestDate(harvestDate);
+
+		final OAIProvenance p = new OAIProvenance();
+		p.setOriginDescription(desc);
+
+		return p;
+	}
+
+	public static Journal journal(
+		final String name,
+		final String issnPrinted,
+		final String issnOnline,
+		final String issnLinking,
+		final DataInfo dataInfo) {
+
+		return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal(
+			name,
+			issnPrinted,
+			issnOnline,
+			issnLinking,
+			null,
+			null,
+			null,
+			null,
+			null,
+			null,
+			null,
+			dataInfo) : null;
+	}
+
+	public static Journal journal(
+		final String name,
+		final String issnPrinted,
+		final String issnOnline,
+		final String issnLinking,
+		final String ep,
+		final String iss,
+		final String sp,
+		final String vol,
+		final String edition,
+		final String conferenceplace,
+		final String conferencedate,
+		final DataInfo dataInfo) {
+
+		if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) {
+			final Journal j = new Journal();
+			j.setName(name);
+			j.setIssnPrinted(issnPrinted);
+			j.setIssnOnline(issnOnline);
+			j.setIssnLinking(issnLinking);
+			j.setEp(ep);
+			j.setIss(iss);
+			j.setSp(sp);
+			j.setVol(vol);
+			j.setEdition(edition);
+			j.setConferenceplace(conferenceplace);
+			j.setConferencedate(conferencedate);
+			j.setDataInfo(dataInfo);
+			return j;
+		} else {
+			return null;
+		}
+	}
+
+	private static boolean hasIssn(String issnPrinted, String issnOnline, String issnLinking) {
+		return StringUtils.isNotBlank(issnPrinted)
+			|| StringUtils.isNotBlank(issnOnline)
+			|| StringUtils.isNotBlank(issnLinking);
+	}
+
+	public static DataInfo dataInfo(
+		final Boolean deletedbyinference,
+		final String inferenceprovenance,
+		final Boolean inferred,
+		final Boolean invisible,
+		final Qualifier provenanceaction,
+		final String trust) {
+		final DataInfo d = new DataInfo();
+		d.setDeletedbyinference(deletedbyinference);
+		d.setInferenceprovenance(inferenceprovenance);
+		d.setInferred(inferred);
+		d.setInvisible(invisible);
+		d.setProvenanceaction(provenanceaction);
+		d.setTrust(trust);
+		return d;
+	}
+
+	public static String createOpenaireId(
+		final int prefix,
+		final String originalId,
+		final boolean to_md5) {
+		if (StringUtils.isBlank(originalId)) {
+			return null;
+		} else if (to_md5) {
+			final String nsPrefix = StringUtils.substringBefore(originalId, "::");
+			final String rest = StringUtils.substringAfter(originalId, "::");
+			return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
+		} else {
+			return String.format("%s|%s", prefix, originalId);
+		}
+	}
+
+	public static String createOpenaireId(
+		final String type,
+		final String originalId,
+		final boolean to_md5) {
+		switch (type) {
+			case "datasource":
+				return createOpenaireId(10, originalId, to_md5);
+			case "organization":
+				return createOpenaireId(20, originalId, to_md5);
+			case "person":
+				return createOpenaireId(30, originalId, to_md5);
+			case "project":
+				return createOpenaireId(40, originalId, to_md5);
+			default:
+				return createOpenaireId(50, originalId, to_md5);
+		}
+	}
+
+	public static String asString(final Object o) {
+		return o == null ? "" : o.toString();
+	}
+
+	public static <T> Predicate<T> distinctByKey(
+		final Function<? super T, ?> keyExtractor) {
+		final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
+		return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
+	}
+
+	public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
+		return getBestAccessRights(instanceList);
+	}
+
+	protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
+		if (instanceList != null) {
+			final Optional<AccessRight> min = instanceList
+				.stream()
+				.map(Instance::getAccessright)
+				.min(new AccessRightComparator<>());
+
+			final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
+
+			if (StringUtils.isBlank(rights.getClassid())) {
+				rights.setClassid(UNKNOWN);
+			}
+			if (StringUtils.isBlank(rights.getClassname())
+				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
+				rights.setClassname(NOT_AVAILABLE);
+			}
+			if (StringUtils.isBlank(rights.getSchemeid())) {
+				rights.setSchemeid(DNET_ACCESS_MODES);
+			}
+			if (StringUtils.isBlank(rights.getSchemename())) {
+				rights.setSchemename(DNET_ACCESS_MODES);
+			}
+
+			return rights;
+		}
+		return null;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
@ -1,27 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-public class OrganizationPidComparator implements Comparator<PidType> {
-
-	@Override
-	public int compare(PidType pLeft, PidType pRight) {
-		if (pLeft.equals(PidType.GRID))
-			return -1;
-		if (pRight.equals(PidType.GRID))
-			return 1;
-
-		if (pLeft.equals(PidType.mag_id))
-			return -1;
-		if (pRight.equals(PidType.mag_id))
-			return 1;
-
-		if (pLeft.equals(PidType.urn))
-			return -1;
-		if (pRight.equals(PidType.urn))
-			return 1;
-
-		return 0;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
@ -1,54 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.Organization;
-import eu.dnetlib.dhp.schema.oaf.Result;
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
-
-	private T entity;
-
-	public PidComparator(T entity) {
-		this.entity = entity;
-	}
-
-	@Override
-	public int compare(StructuredProperty left, StructuredProperty right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		PidType lClass = PidType.valueOf(left.getQualifier().getClassid());
-		PidType rClass = PidType.valueOf(right.getQualifier().getClassid());
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (ModelSupport.isSubClass(entity, Result.class)) {
-			return compareResultPids(lClass, rClass);
-		}
-		if (ModelSupport.isSubClass(entity, Organization.class)) {
-			return compareOrganizationtPids(lClass, rClass);
-		}
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return lClass.compareTo(rClass);
-	}
-
-	private int compareResultPids(PidType lClass, PidType rClass) {
-		return new ResultPidComparator().compare(lClass, rClass);
-	}
-
-	private int compareOrganizationtPids(PidType lClass, PidType rClass) {
-		return new OrganizationPidComparator().compare(lClass, rClass);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
@ -1,29 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import org.apache.commons.lang3.EnumUtils;
-
-public enum PidType {
-
-	// Result
-	doi, pmid, pmc, handle, arXiv, NCID, GBIF, nct, pdb,
-
-	// Organization
-	GRID, mag_id, urn,
-
-	// Used by dedup
-	undefined, original;
-
-	public static boolean isValid(String type) {
-		return EnumUtils.isValidEnum(PidType.class, type);
-	}
-
-	public static PidType tryValueOf(String s) {
-		try {
-			return PidType.valueOf(s);
-		} catch (Exception e) {
-			return PidType.original;
-		}
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
@ -1,57 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-public class ResultPidComparator implements Comparator<PidType> {
-
-	@Override
-	public int compare(PidType pLeft, PidType pRight) {
-		if (pLeft.equals(PidType.doi))
-			return -1;
-		if (pRight.equals(PidType.doi))
-			return 1;
-
-		if (pLeft.equals(PidType.pmid))
-			return -1;
-		if (pRight.equals(PidType.pmid))
-			return 1;
-
-		if (pLeft.equals(PidType.pmc))
-			return -1;
-		if (pRight.equals(PidType.pmc))
-			return 1;
-
-		if (pLeft.equals(PidType.handle))
-			return -1;
-		if (pRight.equals(PidType.handle))
-			return 1;
-
-		if (pLeft.equals(PidType.arXiv))
-			return -1;
-		if (pRight.equals(PidType.arXiv))
-			return 1;
-
-		if (pLeft.equals(PidType.NCID))
-			return -1;
-		if (pRight.equals(PidType.NCID))
-			return 1;
-
-		if (pLeft.equals(PidType.GBIF))
-			return -1;
-		if (pRight.equals(PidType.GBIF))
-			return 1;
-
-		if (pLeft.equals(PidType.nct))
-			return -1;
-		if (pRight.equals(PidType.nct))
-			return 1;
-
-		if (pLeft.equals(PidType.urn))
-			return -1;
-		if (pRight.equals(PidType.urn))
-			return 1;
-
-		return 0;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
@ -1,63 +1,107 @@

 package eu.dnetlib.dhp.utils;

-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
+import java.io.*;
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
+import java.util.*;
+import java.util.stream.Collectors;

-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Base64OutputStream;
 import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SaveMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Maps;
 import com.jayway.jsonpath.JsonPath;

+import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
+import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
 import net.minidev.json.JSONArray;
+import scala.collection.JavaConverters;
+import scala.collection.Seq;

 public class DHPUtils {

+	private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
+
+	private DHPUtils() {
+	}
+
+	public static Seq<String> toSeq(List<String> list) {
+		return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
+	}
+
 	public static String md5(final String s) {
 		try {
 			final MessageDigest md = MessageDigest.getInstance("MD5");
 			md.update(s.getBytes(StandardCharsets.UTF_8));
 			return new String(Hex.encodeHex(md.digest()));
 		} catch (final Exception e) {
-			System.err.println("Error creating id");
+			log.error("Error creating id from {}", s);
 			return null;
 		}
 	}

+	/**
+	 * Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
+	 * by he given format, layout, interpretation
+	 * @param mdstoreManagerUrl the URL of the mdstore manager service
+	 * @param format the mdstore format
+	 * @param layout the mdstore layout
+	 * @param interpretation the mdstore interpretation
+	 * @param includeEmpty include Empty mdstores
+	 * @return the set of hdfs paths
+	 * @throws IOException in case of HTTP communication issues
+	 */
+	public static Set<String> mdstorePaths(final String mdstoreManagerUrl,
+		final String format,
+		final String layout,
+		final String interpretation,
+		boolean includeEmpty) throws IOException {
+		final String url = mdstoreManagerUrl + "/mdstores/";
+		final ObjectMapper objectMapper = new ObjectMapper();
+
+		final HttpGet req = new HttpGet(url);
+
+		try (final CloseableHttpClient client = HttpClients.createDefault()) {
+			try (final CloseableHttpResponse response = client.execute(req)) {
+				final String json = IOUtils.toString(response.getEntity().getContent());
+				final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
+				return Arrays
+					.stream(mdstores)
+					.filter(md -> md.getFormat().equalsIgnoreCase(format))
+					.filter(md -> md.getLayout().equalsIgnoreCase(layout))
+					.filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation))
+					.filter(md -> StringUtils.isNotBlank(md.getHdfsPath()))
+					.filter(md -> StringUtils.isNotBlank(md.getCurrentVersion()))
+					.filter(md -> includeEmpty || md.getSize() > 0)
+					.map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store")
+					.collect(Collectors.toSet());
+			}
+		}
+	}
+
 	public static String generateIdentifier(final String originalId, final String nsPrefix) {
 		return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
 	}

-	public static String compressString(final String input) {
-		try (ByteArrayOutputStream out = new ByteArrayOutputStream();
-			Base64OutputStream b64os = new Base64OutputStream(out)) {
-			GZIPOutputStream gzip = new GZIPOutputStream(b64os);
-			gzip.write(input.getBytes(StandardCharsets.UTF_8));
-			gzip.close();
-			return out.toString();
-		} catch (Throwable e) {
-			return null;
-		}
-	}
+	public static String generateUnresolvedIdentifier(final String pid, final String pidType) {

-	public static String decompressString(final String input) {
-		byte[] byteArray = Base64.decodeBase64(input.getBytes());
-		int len;
-		try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
-			ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
-			byte[] buffer = new byte[1024];
-			while ((len = gis.read(buffer)) != -1) {
-				bos.write(buffer, 0, len);
-			}
-			return bos.toString();
-		} catch (Exception e) {
-			return null;
-		}
+		final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
+
+		return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
 	}

 	public static String getJPathString(final String jsonPath, final String json) {
@ -72,4 +116,72 @@ public class DHPUtils {
 			return "";
 		}
 	}
+
+	public static final ObjectMapper MAPPER = new ObjectMapper();
+
+	public static void writeHdfsFile(final Configuration conf, final String content, final String path)
+		throws IOException {
+
+		log.info("writing file {}, size {}", path, content.length());
+		try (FileSystem fs = FileSystem.get(conf);
+			BufferedOutputStream os = new BufferedOutputStream(fs.create(new Path(path)))) {
+			os.write(content.getBytes(StandardCharsets.UTF_8));
+			os.flush();
+		}
+	}
+
+	public static String readHdfsFile(Configuration conf, String path) throws IOException {
+		log.info("reading file {}", path);
+
+		try (FileSystem fs = FileSystem.get(conf)) {
+			final Path p = new Path(path);
+			if (!fs.exists(p)) {
+				throw new FileNotFoundException(path);
+			}
+			return IOUtils.toString(fs.open(p));
+		}
+	}
+
+	public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
+		return MAPPER.readValue(readHdfsFile(conf, path), clazz);
+	}
+
+	public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
+		log.info("saving dataset in: {}", targetPath);
+		mdstore
+			.write()
+			.mode(SaveMode.Overwrite)
+			.format("parquet")
+			.save(targetPath);
+	}
+
+	public static Configuration getHadoopConfiguration(String nameNode) {
+		// ====== Init HDFS File System Object
+		Configuration conf = new Configuration();
+		// Set FileSystem URI
+		conf.set("fs.defaultFS", nameNode);
+		// Because of Maven
+		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+
+		System.setProperty("hadoop.home.dir", "/");
+		return conf;
+	}
+
+	public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
+		File file = new File(System.getProperty("oozie.action.output.properties"));
+		Properties props = new Properties();
+		report.forEach((k, v) -> props.setProperty(k, v));
+
+		try (OutputStream os = new FileOutputStream(file)) {
+			props.store(os, "");
+		}
+	}
+
+	public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
+		Map<String, String> report = Maps.newHashMap();
+		report.put(paramName, value);
+
+		populateOOZIEEnv(report);
+	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
@ -1,11 +1,11 @@

 package eu.dnetlib.dhp.utils;

-import java.util.Map;
-
-import javax.xml.ws.BindingProvider;
-
+import org.apache.cxf.endpoint.Client;
+import org.apache.cxf.frontend.ClientProxy;
 import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
+import org.apache.cxf.transport.http.HTTPConduit;
+import org.apache.cxf.transports.http.configuration.HTTPClientPolicy;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -15,8 +15,11 @@ public class ISLookupClientFactory {

 	private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);

-	private static int requestTimeout = 60000 * 10;
-	private static int connectTimeout = 60000 * 10;
+	private static final int requestTimeout = 60000 * 10;
+	private static final int connectTimeout = 60000 * 10;
+
+	private ISLookupClientFactory() {
+	}

 	public static ISLookUpService getLookUpService(final String isLookupUrl) {
 		return getServiceStub(ISLookUpService.class, isLookupUrl);
@ -24,27 +27,28 @@ public class ISLookupClientFactory {

 	@SuppressWarnings("unchecked")
 	private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
-		log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint));
+		log.info("creating {} stub from {}", clazz.getName(), endpoint);
 		final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
 		jaxWsProxyFactory.setServiceClass(clazz);
 		jaxWsProxyFactory.setAddress(endpoint);

 		final T service = (T) jaxWsProxyFactory.create();

-		if (service instanceof BindingProvider) {
+		Client client = ClientProxy.getClient(service);
+		if (client != null) {
+			HTTPConduit conduit = (HTTPConduit) client.getConduit();
+			HTTPClientPolicy policy = new HTTPClientPolicy();
+
 			log
 				.info(
-					"setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
-					BindingProvider.class.getName(), requestTimeout, connectTimeout);
+					"setting connectTimeout to {}, requestTimeout to {} for service {}",
+					connectTimeout,
+					requestTimeout,
+					clazz.getCanonicalName());

-			Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
-
-			requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
-			requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
-			requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
-			requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
-			requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
-			requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
+			policy.setConnectionTimeout(connectTimeout);
+			policy.setReceiveTimeout(requestTimeout);
+			conduit.setClient(policy);
 		}

 		return service;
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java
@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;

 public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {

-	public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
+	public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";

 	public abstract String getName();

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java
@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {

 	@Override
 	public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
-		if (arguments == null | arguments.length == 0) {
+		if (arguments == null || arguments.length == 0) {
 			return new StringValue("");
 		}
 		final Item item = arguments[0].head();
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
 		for (String format : dateFormats) {
 			try {
 				c.setTime(new SimpleDateFormat(format).parse(s));
-				String year = String.valueOf(c.get(Calendar.YEAR));
-				return year;
+				return String.valueOf(c.get(Calendar.YEAR));
 			} catch (ParseException e) {
 			}
 		}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java
@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {

 	@Override
 	public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
-		if (arguments == null | arguments.length == 0) {
+		if (arguments == null || arguments.length == 0) {
 			return new StringValue(BLANK);
 		}
 		String s = arguments[0].head().getStringValue();
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java
@ -1,6 +1,8 @@

 package eu.dnetlib.dhp.utils.saxon;

+import static org.apache.commons.lang3.StringUtils.isNotBlank;
+
 import org.apache.commons.lang3.StringUtils;

 import net.sf.saxon.expr.XPathContext;
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
 		final String s1 = getValue(arguments[0]);
 		final String s2 = getValue(arguments[1]);

-		return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
+		final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
+		return new StringValue(value);
 	}

 	private String getValue(final Sequence arg) throws XPathException {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java
@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;

 public class SaxonTransformerFactory {

+	private SaxonTransformerFactory() {
+	}
+
 	/**
 	 * Creates the index record transformer from the given XSLT
 	 *
--- a/dhp-common/src/main/java/eu/dnetlib/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/Message.java
@ -1,76 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.util.Map;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-public class Message {
-
-	private String workflowId;
-
-	private String jobName;
-
-	private MessageType type;
-
-	private Map<String, String> body;
-
-	public static Message fromJson(final String json) throws IOException {
-		final ObjectMapper jsonMapper = new ObjectMapper();
-		return jsonMapper.readValue(json, Message.class);
-	}
-
-	public Message() {
-	}
-
-	public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
-		this.workflowId = workflowId;
-		this.jobName = jobName;
-		this.type = type;
-		this.body = body;
-	}
-
-	public String getWorkflowId() {
-		return workflowId;
-	}
-
-	public void setWorkflowId(String workflowId) {
-		this.workflowId = workflowId;
-	}
-
-	public String getJobName() {
-		return jobName;
-	}
-
-	public void setJobName(String jobName) {
-		this.jobName = jobName;
-	}
-
-	public MessageType getType() {
-		return type;
-	}
-
-	public void setType(MessageType type) {
-		this.type = type;
-	}
-
-	public Map<String, String> getBody() {
-		return body;
-	}
-
-	public void setBody(Map<String, String> body) {
-		this.body = body;
-	}
-
-	@Override
-	public String toString() {
-		final ObjectMapper jsonMapper = new ObjectMapper();
-		try {
-			return jsonMapper.writeValueAsString(this);
-		} catch (JsonProcessingException e) {
-			return null;
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import com.rabbitmq.client.AMQP;
-import com.rabbitmq.client.Channel;
-import com.rabbitmq.client.DefaultConsumer;
-import com.rabbitmq.client.Envelope;
-
-public class MessageConsumer extends DefaultConsumer {
-
-	final LinkedBlockingQueue<Message> queueMessages;
-
-	/**
-	 * Constructs a new instance and records its association to the passed-in channel.
-	 *
-	 * @param channel the channel to which this consumer is attached
-	 * @param queueMessages
-	 */
-	public MessageConsumer(Channel channel, LinkedBlockingQueue<Message> queueMessages) {
-		super(channel);
-		this.queueMessages = queueMessages;
-	}
-
-	@Override
-	public void handleDelivery(
-		String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
-		throws IOException {
-		final String json = new String(body, StandardCharsets.UTF_8);
-		Message message = Message.fromJson(json);
-		try {
-			this.queueMessages.put(message);
-			System.out.println("Receiving Message " + message);
-		} catch (InterruptedException e) {
-			if (message.getType() == MessageType.REPORT)
-				throw new RuntimeException("Error on sending message");
-			else {
-				// TODO LOGGING EXCEPTION
-			}
-		} finally {
-			getChannel().basicAck(envelope.getDeliveryTag(), false);
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
@ -1,136 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.TimeoutException;
-
-import com.rabbitmq.client.Channel;
-import com.rabbitmq.client.Connection;
-import com.rabbitmq.client.ConnectionFactory;
-
-public class MessageManager {
-
-	private final String messageHost;
-
-	private final String username;
-
-	private final String password;
-
-	private Connection connection;
-
-	private final Map<String, Channel> channels = new HashMap<>();
-
-	private boolean durable;
-
-	private boolean autodelete;
-
-	private final LinkedBlockingQueue<Message> queueMessages;
-
-	public MessageManager(
-		String messageHost,
-		String username,
-		String password,
-		final LinkedBlockingQueue<Message> queueMessages) {
-		this.queueMessages = queueMessages;
-		this.messageHost = messageHost;
-		this.username = username;
-		this.password = password;
-	}
-
-	public MessageManager(
-		String messageHost,
-		String username,
-		String password,
-		boolean durable,
-		boolean autodelete,
-		final LinkedBlockingQueue<Message> queueMessages) {
-		this.queueMessages = queueMessages;
-		this.messageHost = messageHost;
-		this.username = username;
-		this.password = password;
-
-		this.durable = durable;
-		this.autodelete = autodelete;
-	}
-
-	private Connection createConnection() throws IOException, TimeoutException {
-		ConnectionFactory factory = new ConnectionFactory();
-		factory.setHost(this.messageHost);
-		factory.setUsername(this.username);
-		factory.setPassword(this.password);
-		return factory.newConnection();
-	}
-
-	private Channel createChannel(
-		final Connection connection,
-		final String queueName,
-		final boolean durable,
-		final boolean autodelete)
-		throws Exception {
-		Map<String, Object> args = new HashMap<>();
-		args.put("x-message-ttl", 10000);
-		Channel channel = connection.createChannel();
-		channel.queueDeclare(queueName, durable, false, this.autodelete, args);
-		return channel;
-	}
-
-	private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
-		throws Exception {
-		if (channels.containsKey(queueName)) {
-			return channels.get(queueName);
-		}
-
-		if (this.connection == null) {
-			this.connection = createConnection();
-		}
-		channels.put(queueName, createChannel(this.connection, queueName, durable, autodelete));
-		return channels.get(queueName);
-	}
-
-	public void close() throws IOException {
-		channels
-			.values()
-			.forEach(
-				ch -> {
-					try {
-						ch.close();
-					} catch (Exception e) {
-						// TODO LOG
-					}
-				});
-
-		this.connection.close();
-	}
-
-	public boolean sendMessage(final Message message, String queueName) throws Exception {
-		try {
-			Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
-			channel.basicPublish("", queueName, null, message.toString().getBytes());
-			return true;
-		} catch (Throwable e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public boolean sendMessage(
-		final Message message, String queueName, boolean durable_var, boolean autodelete_var)
-		throws Exception {
-		try {
-			Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
-			channel.basicPublish("", queueName, null, message.toString().getBytes());
-			return true;
-		} catch (Throwable e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public void startConsumingMessage(
-		final String queueName, final boolean durable, final boolean autodelete) throws Exception {
-
-		Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
-		channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
@ -1,6 +0,0 @@
-
-package eu.dnetlib.message;
-
-public enum MessageType {
-	ONGOING, REPORT
-}
--- a/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
--- a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
@ -0,0 +1,73 @@
+package eu.dnetlib.dhp.application
+
+import scala.io.Source
+
+/** This is the main Interface SparkApplication
+  * where all the Spark Scala class should inherit
+  */
+trait SparkScalaApplication {
+
+  /** This is the path in the classpath of the json
+    * describes all the argument needed to run
+    */
+  val propertyPath: String
+
+  /** Utility to parse the arguments using the
+    * property json in the classpath identified from
+    * the variable propertyPath
+    *
+    * @param args the list of arguments
+    */
+  def parseArguments(args: Array[String]): ArgumentApplicationParser = {
+    val parser = new ArgumentApplicationParser(
+      Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString
+    )
+    parser.parseArgument(args)
+    parser
+  }
+
+  /** Here all the spark applications runs this method
+    * where the whole logic of the spark node is defined
+    */
+  def run(): Unit
+}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.slf4j.Logger
+
+abstract class AbstractScalaApplication(
+  val propertyPath: String,
+  val args: Array[String],
+  log: Logger
+) extends SparkScalaApplication {
+
+  var parser: ArgumentApplicationParser = null
+
+  var spark: SparkSession = null
+
+  def initialize(): SparkScalaApplication = {
+    parser = parseArguments(args)
+    spark = createSparkSession()
+    this
+  }
+
+  /** Utility for creating a spark session starting from parser
+    *
+    * @return a spark Session
+    */
+  private def createSparkSession(): SparkSession = {
+    require(parser != null)
+
+    val conf: SparkConf = new SparkConf()
+    val master = parser.get("master")
+    log.info(s"Creating Spark session: Master: $master")
+    SparkSession
+      .builder()
+      .config(conf)
+      .appName(getClass.getSimpleName)
+      .master(master)
+      .getOrCreate()
+  }
+
+}
--- a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
@ -0,0 +1,442 @@
+package eu.dnetlib.dhp.sx.graph.scholix
+
+import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
+import eu.dnetlib.dhp.schema.sx.scholix._
+import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
+import eu.dnetlib.dhp.utils.DHPUtils
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.{Encoder, Encoders}
+import org.json4s
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods.parse
+import scala.collection.JavaConverters._
+import scala.io.Source
+
+object ScholixUtils extends Serializable {
+
+  val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier"
+
+  val DATE_RELATION_KEY: String = "RelationDate"
+
+  case class RelationVocabulary(original: String, inverse: String) {}
+
+  case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {}
+
+  val relations: Map[String, RelationVocabulary] = {
+    val input = Source
+      .fromInputStream(
+        getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")
+      )
+      .mkString
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+
+    lazy val json: json4s.JValue = parse(input)
+
+    json.extract[Map[String, RelationVocabulary]]
+  }
+
+  def extractRelationDate(relation: Relation): String = {
+
+    if (relation.getProperties == null || !relation.getProperties.isEmpty)
+      null
+    else {
+      val date = relation.getProperties.asScala
+        .find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey))
+        .map(p => p.getValue)
+      if (date.isDefined)
+        date.get
+      else
+        null
+    }
+  }
+
+  def extractRelationDate(summary: ScholixSummary): String = {
+
+    if (summary.getDate == null || summary.getDate.isEmpty)
+      null
+    else {
+      summary.getDate.get(0)
+    }
+  }
+
+  def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = {
+    new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName)
+
+  }
+
+  def generateScholixResourceFromResult(r: Result): ScholixResource = {
+    generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
+  }
+
+  val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
+    new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable {
+      override def zero: RelatedEntities = null
+
+      override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = {
+        val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0
+        val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0
+
+        if (b == null)
+          RelatedEntities(a._1, relatedDataset, relatedPublication)
+        else
+          RelatedEntities(
+            a._1,
+            b.relatedDataset + relatedDataset,
+            b.relatedPublication + relatedPublication
+          )
+      }
+
+      override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = {
+        if (b1 != null && b2 != null)
+          RelatedEntities(
+            b1.id,
+            b1.relatedDataset + b2.relatedDataset,
+            b1.relatedPublication + b2.relatedPublication
+          )
+        else if (b1 != null)
+          b1
+        else
+          b2
+      }
+
+      override def finish(reduction: RelatedEntities): RelatedEntities = reduction
+
+      override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
+
+      override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
+    }
+
+  val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] =
+    new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable {
+      override def zero: Scholix = null
+
+      def scholix_complete(s: Scholix): Boolean = {
+        if (s == null || s.getIdentifier == null) {
+          false
+        } else if (s.getSource == null || s.getTarget == null) {
+          false
+        } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty)
+          false
+        else
+          true
+      }
+
+      override def reduce(b: Scholix, a: (String, Scholix)): Scholix = {
+        if (scholix_complete(b)) b else a._2
+      }
+
+      override def merge(b1: Scholix, b2: Scholix): Scholix = {
+        if (scholix_complete(b1)) b1 else b2
+      }
+
+      override def finish(reduction: Scholix): Scholix = reduction
+
+      override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+
+      override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+    }
+
+  def createInverseScholixRelation(scholix: Scholix): Scholix = {
+    val s = new Scholix
+    s.setPublicationDate(scholix.getPublicationDate)
+    s.setPublisher(scholix.getPublisher)
+    s.setLinkprovider(scholix.getLinkprovider)
+    s.setRelationship(inverseRelationShip(scholix.getRelationship))
+    s.setSource(scholix.getTarget)
+    s.setTarget(scholix.getSource)
+    s.setIdentifier(
+      DHPUtils.md5(
+        s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+      )
+    )
+    s
+
+  }
+
+  def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
+    if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
+      val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
+        new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers)
+      }(collection.breakOut)
+      l
+    } else List()
+  }
+
+  def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = {
+    if (summary.getDatasources != null && !summary.getDatasources.isEmpty) {
+      val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d =>
+        new ScholixEntityId(
+          d.getDatasourceName,
+          List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava
+        )
+      }(collection.breakOut)
+      l
+    } else List()
+  }
+
+  def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = {
+    if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) {
+
+      val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c =>
+        new ScholixEntityId(
+          c.getValue,
+          List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava
+        )
+      }.toList
+      l
+    } else List()
+  }
+
+  def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = {
+    val s = new Scholix
+    s.setPublicationDate(scholix.getPublicationDate)
+    s.setPublisher(scholix.getPublisher)
+    s.setLinkprovider(scholix.getLinkprovider)
+    s.setRelationship(scholix.getRelationship)
+    s.setSource(scholix.getSource)
+    s.setTarget(generateScholixResourceFromSummary(target))
+    s.setIdentifier(
+      DHPUtils.md5(
+        s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+      )
+    )
+    s
+  }
+
+  def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = {
+    val s = new Scholix
+    s.setPublicationDate(scholix.getPublicationDate)
+    s.setPublisher(scholix.getPublisher)
+    s.setLinkprovider(scholix.getLinkprovider)
+    s.setRelationship(scholix.getRelationship)
+    s.setSource(scholix.getSource)
+    s.setTarget(target)
+    s.setIdentifier(
+      DHPUtils.md5(
+        s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+      )
+    )
+    s
+  }
+
+  def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = {
+    val r = new ScholixResource
+    r.setIdentifier(summaryObject.getLocalIdentifier)
+    r.setDnetIdentifier(summaryObject.getId)
+
+    r.setObjectType(summaryObject.getTypology.toString)
+    r.setObjectSubType(summaryObject.getSubType)
+
+    if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty)
+      r.setTitle(summaryObject.getTitle.get(0))
+
+    if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
+      val l: List[ScholixEntityId] =
+        summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
+      if (l.nonEmpty)
+        r.setCreator(l.asJava)
+    }
+
+    if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty)
+      r.setPublicationDate(summaryObject.getDate.get(0))
+    if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) {
+      val plist: List[ScholixEntityId] =
+        summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList
+
+      if (plist.nonEmpty)
+        r.setPublisher(plist.asJava)
+    }
+
+    if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) {
+
+      val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala
+        .map(c =>
+          new ScholixCollectedFrom(
+            new ScholixEntityId(
+              c.getDatasourceName,
+              List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava
+            ),
+            "collected",
+            "complete"
+          )
+        )
+        .toList
+
+      if (l.nonEmpty)
+        r.setCollectedFrom(l.asJava)
+
+    }
+    r
+  }
+
+  def scholixFromSource(relation: Relation, source: ScholixResource): Scholix = {
+    if (relation == null || source == null)
+      return null
+    val s = new Scholix
+    var l: List[ScholixEntityId] = extractCollectedFrom(relation)
+    if (l.isEmpty)
+      l = extractCollectedFrom(source)
+    if (l.isEmpty)
+      return null
+    s.setLinkprovider(l.asJava)
+    var d = extractRelationDate(relation)
+    if (d == null)
+      d = source.getPublicationDate
+
+    s.setPublicationDate(d)
+
+    if (source.getPublisher != null && !source.getPublisher.isEmpty) {
+      s.setPublisher(source.getPublisher)
+    }
+
+    val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
+    if (semanticRelation == null)
+      return null
+    s.setRelationship(
+      new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
+    )
+    s.setSource(source)
+
+    s
+  }
+
+  def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = {
+
+    if (relation == null || source == null)
+      return null
+
+    val s = new Scholix
+
+    var l: List[ScholixEntityId] = extractCollectedFrom(relation)
+    if (l.isEmpty)
+      l = extractCollectedFrom(source)
+    if (l.isEmpty)
+      return null
+
+    s.setLinkprovider(l.asJava)
+
+    var d = extractRelationDate(relation)
+    if (d == null)
+      d = extractRelationDate(source)
+
+    s.setPublicationDate(d)
+
+    if (source.getPublisher != null && !source.getPublisher.isEmpty) {
+      val l: List[ScholixEntityId] = source.getPublisher.asScala
+        .map { p =>
+          new ScholixEntityId(p, null)
+        }(collection.breakOut)
+
+      if (l.nonEmpty)
+        s.setPublisher(l.asJava)
+    }
+
+    val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
+    if (semanticRelation == null)
+      return null
+    s.setRelationship(
+      new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
+    )
+    s.setSource(generateScholixResourceFromSummary(source))
+
+    s
+  }
+
+  def findURLForPID(
+    pidValue: List[StructuredProperty],
+    urls: List[String]
+  ): List[(StructuredProperty, String)] = {
+    pidValue.map { p =>
+      val pv = p.getValue
+
+      val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase))
+      (p, r.orNull)
+    }
+  }
+
+  def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = {
+    if (r.getInstance() == null || r.getInstance().isEmpty)
+      return List()
+    r.getInstance()
+      .asScala
+      .filter(i => i.getUrl != null && !i.getUrl.isEmpty)
+      .filter(i => i.getPid != null && i.getUrl != null)
+      .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList))
+      .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2))
+      .distinct
+      .toList
+  }
+
+  def resultToSummary(r: Result): ScholixSummary = {
+    val s = new ScholixSummary
+    s.setId(r.getId)
+    if (r.getPid == null || r.getPid.isEmpty)
+      return null
+
+    val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r)
+    if (persistentIdentifiers.isEmpty)
+      return null
+    s.setLocalIdentifier(persistentIdentifiers.asJava)
+    if (r.isInstanceOf[Publication])
+      s.setTypology(Typology.publication)
+    else
+      s.setTypology(Typology.dataset)
+
+    s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
+
+    if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
+      val titles: List[String] = r.getTitle.asScala.map(t => t.getValue).toList
+      if (titles.nonEmpty)
+        s.setTitle(titles.asJava)
+      else
+        return null
+    }
+
+    if (r.getAuthor != null && !r.getAuthor.isEmpty) {
+      val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
+      if (authors.nonEmpty)
+        s.setAuthor(authors.asJava)
+    }
+    if (r.getInstance() != null) {
+      val dt: List[String] = r
+        .getInstance()
+        .asScala
+        .filter(i => i.getDateofacceptance != null)
+        .map(i => i.getDateofacceptance.getValue)
+        .toList
+      if (dt.nonEmpty)
+        s.setDate(dt.distinct.asJava)
+    }
+    if (r.getDescription != null && !r.getDescription.isEmpty) {
+      val d = r.getDescription.asScala.find(f => f != null && f.getValue != null)
+      if (d.isDefined)
+        s.setDescription(d.get.getValue)
+    }
+
+    if (r.getSubject != null && !r.getSubject.isEmpty) {
+      val subjects: List[SchemeValue] = r.getSubject.asScala
+        .map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))
+        .toList
+      if (subjects.nonEmpty)
+        s.setSubject(subjects.asJava)
+    }
+
+    if (r.getPublisher != null)
+      s.setPublisher(List(r.getPublisher.getValue).asJava)
+
+    if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) {
+      val cf: List[CollectedFromType] = r.getCollectedfrom.asScala
+        .map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))
+        .toList
+      if (cf.nonEmpty)
+        s.setDatasources(cf.distinct.asJava)
+    }
+
+    s.setRelatedDatasets(0)
+    s.setRelatedPublications(0)
+    s.setRelatedUnknown(0)
+
+    s
+  }
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java
@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
 import org.apache.commons.io.IOUtils;
 import org.junit.jupiter.api.Test;

-public class ArgumentApplicationParserTest {
+class ArgumentApplicationParserTest {

 	@Test
-	public void testParseParameter() throws Exception {
+	void testParseParameter() throws Exception {
 		final String jsonConfiguration = IOUtils
 			.toString(
 				this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java
@ -21,13 +21,13 @@ public class HdfsSupportTest {
 	class Remove {

 		@Test
-		public void shouldThrowARuntimeExceptionOnError() {
+		void shouldThrowARuntimeExceptionOnError() {
 			// when
 			assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
 		}

 		@Test
-		public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
+		void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
 			// when
 			HdfsSupport.remove(tempDir.toString(), new Configuration());

@ -36,7 +36,7 @@ public class HdfsSupportTest {
 		}

 		@Test
-		public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
+		void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
 			// given
 			Path file = Files.createTempFile(tempDir, "p", "s");

@ -52,13 +52,13 @@ public class HdfsSupportTest {
 	class ListFiles {

 		@Test
-		public void shouldThrowARuntimeExceptionOnError() {
+		void shouldThrowARuntimeExceptionOnError() {
 			// when
 			assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
 		}

 		@Test
-		public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
+		void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
 			Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
 			Path subDir2 = Files.createTempDirectory(tempDir, "list_me");

--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java
@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;

 import org.junit.jupiter.api.Test;

-public class PacePersonTest {
+class PacePersonTest {

 	@Test
-	public void pacePersonTest1() {
+	void pacePersonTest1() {

 		PacePerson p = new PacePerson("Artini, Michele", false);
 		assertEquals("Artini", p.getSurnameString());
@ -17,7 +17,7 @@ public class PacePersonTest {
 	}

 	@Test
-	public void pacePersonTest2() {
+	void pacePersonTest2() {
 		PacePerson p = new PacePerson("Michele G. Artini", false);
 		assertEquals("Artini, Michele G.", p.getNormalisedFullname());
 		assertEquals("Michele G", p.getNameString());
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java
@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
 	class RunWithSparkSession {

 		@Test
-		public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
+		@SuppressWarnings("unchecked")
+		void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
 			throws Exception {
 			// given
 			SparkSession spark = mock(SparkSession.class);
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
 		}

 		@Test
-		public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
+		@SuppressWarnings("unchecked")
+		void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
 			throws Exception {
 			// given
 			SparkSession spark = mock(SparkSession.class);
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -12,15 +12,39 @@ import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

@Disabled
-public class ZenodoAPIClientTest {
+class ZenodoAPIClientTest {

 	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
 	private final String ACCESS_TOKEN = "";

 	private final String CONCEPT_REC_ID = "657113";

+	private final String depositionId = "674915";
+
 	@Test
-	public void testNewDeposition() throws IOException {
+	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
+
+		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+		Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewDeposition() throws IOException {

 		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
 			ACCESS_TOKEN);
@ -43,7 +67,7 @@ public class ZenodoAPIClientTest {
 	}

 	@Test
-	public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
+	void testNewVersionNewName() throws IOException, MissingConceptDoiException {

 		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
 			ACCESS_TOKEN);
@ -63,7 +87,7 @@ public class ZenodoAPIClientTest {
 	}

 	@Test
-	public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
+	void testNewVersionOldName() throws IOException, MissingConceptDoiException {

 		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
 			ACCESS_TOKEN);
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
@ -1,16 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import org.junit.jupiter.api.Test;
-
-public class MetadataRecordTest {
-
-	@Test
-	public void getTimestamp() {
-
-		MetadataRecord r = new MetadataRecord();
-		assertTrue(r.getDateOfCollection() > 0);
-	}
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
@ -0,0 +1,100 @@
+
+package eu.dnetlib.dhp.oa.merge;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.pace.util.MapDocumentUtil;
+import scala.Tuple2;
+
+class AuthorMergerTest {
+
+	private String publicationsBasePath;
+
+	private List<List<Author>> authors;
+
+	@BeforeEach
+	public void setUp() throws Exception {
+
+		publicationsBasePath = Paths
+			.get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI())
+			.toFile()
+			.getAbsolutePath();
+
+		authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
+			.stream()
+			.map(p -> p._2().getAuthor())
+			.collect(Collectors.toList());
+
+	}
+
+	@Test
+	void mergeTest() { // used in the dedup: threshold set to 0.95
+
+		for (List<Author> authors1 : authors) {
+			System.out.println("List " + (authors.indexOf(authors1) + 1));
+			for (Author author : authors1) {
+				System.out.println(authorToString(author));
+			}
+		}
+
+		List<Author> merge = AuthorMerger.merge(authors);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(7, merge.size());
+
+	}
+
+	public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) {
+		List<Tuple2<String, T>> res = new ArrayList<>();
+		BufferedReader reader;
+		try {
+			reader = new BufferedReader(new FileReader(path));
+			String line = reader.readLine();
+			while (line != null) {
+				res
+					.add(
+						new Tuple2<>(
+							MapDocumentUtil.getJPathString("$.id", line),
+							new ObjectMapper().readValue(line, clazz)));
+				// read next line
+				line = reader.readLine();
+			}
+			reader.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
+		return res;
+	}
+
+	public String authorToString(Author a) {
+
+		String print = "Fullname = ";
+		print += a.getFullname() + " pid = [";
+		if (a.getPid() != null)
+			for (StructuredProperty sp : a.getPid()) {
+				print += sp.toComparableString() + " ";
+			}
+		print += "]";
+		return print;
+	}
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import java.io.IOException;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Test;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-public class IdentifierFactoryTest {
-
-	private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
-		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-
-	@Test
-	public void testCreateIdentifierForPublication() throws IOException {
-
-		verifyIdentifier("publication_doi.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013"));
-		verifyIdentifier("publication_pmc.json", "50|pmc_________::" + DHPUtils.md5("21459329"));
-		verifyIdentifier(
-			"publication_urn.json",
-			"50|urn_________::" + DHPUtils.md5("urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"));
-
-		final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
-		verifyIdentifier("publication_3.json", defaultID);
-		verifyIdentifier("publication_4.json", defaultID);
-		verifyIdentifier("publication_5.json", defaultID);
-	}
-
-	protected void verifyIdentifier(String filename, String expectedID) throws IOException {
-		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
-		final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
-
-		String id = IdentifierFactory.createIdentifier(pub);
-
-		assertNotNull(id);
-		assertEquals(expectedID, id);
-	}
-
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -0,0 +1,197 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import me.xuender.unidecode.Unidecode;
+
+class OafMapperUtilsTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+	@Test
+	public void testUnidecode() {
+
+		assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ"));
+		assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛"));
+		assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼"));
+		assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい"));
+		assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի"));
+		assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики"));
+		assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ"));
+		assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης"));
+		assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية"));
+		assertEquals("abc def ghi", Unidecode.decode("abc def ghi"));
+	}
+
+	@Test
+	void testDateValidation() {
+
+		assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z  ").isPresent());
+		assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
+		assertTrue(GraphCleaningFunctions.doCleanDate("  2016-04-05").isPresent());
+
+		assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
+
+		assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 MST 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
+		assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
+		assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
+		assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
+		assertEquals(
+			"2015-07-03",
+			GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
+		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
+		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
+		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
+		assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
+		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
+		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
+		assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
+		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
+		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
+		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
+		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
+		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
+		assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
+		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
+		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
+		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
+		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
+		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
+		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
+		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
+		assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
+		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
+		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
+		assertEquals(
+			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
+		assertEquals(
+			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
+		assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
+		assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
+		assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
+		assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
+		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
+		assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
+		assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
+		assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
+		assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
+
+	}
+
+	@Test
+	void testDate() {
+		final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
+		assertNotNull(date);
+		System.out.println(date);
+	}
+
+	@Test
+	void testMergePubs() throws IOException {
+		Publication p1 = read("publication_1.json", Publication.class);
+		Publication p2 = read("publication_2.json", Publication.class);
+		Dataset d1 = read("dataset_1.json", Dataset.class);
+		Dataset d2 = read("dataset_2.json", Dataset.class);
+
+		assertEquals(1, p1.getCollectedfrom().size());
+		assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
+		assertEquals(1, d2.getCollectedfrom().size());
+		assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		assertEquals(
+			ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
+			OafMapperUtils
+				.mergeResults(p1, d2)
+				.getResulttype()
+				.getClassid());
+
+		assertEquals(1, p2.getCollectedfrom().size());
+		assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+		assertEquals(1, d1.getCollectedfrom().size());
+		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		assertEquals(
+			ModelConstants.DATASET_RESULTTYPE_CLASSID,
+			OafMapperUtils
+				.mergeResults(p2, d1)
+				.getResulttype()
+				.getClassid());
+	}
+
+	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
+		return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
+	}
+
+	protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
+		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
+		return OBJECT_MAPPER.readValue(json, clazz);
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
@ -1,51 +0,0 @@
-
-package eu.dnetlib.message;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.junit.jupiter.api.Test;
-
-public class MessageTest {
-
-	@Test
-	public void fromJsonTest() throws IOException {
-		Message m = new Message();
-		m.setWorkflowId("wId");
-		m.setType(MessageType.ONGOING);
-		m.setJobName("Collection");
-		Map<String, String> body = new HashMap<>();
-		body.put("parsedItem", "300");
-		body.put("ExecutionTime", "30s");
-
-		m.setBody(body);
-		System.out.println("m = " + m);
-		Message m1 = Message.fromJson(m.toString());
-		assertEquals(m1.getWorkflowId(), m.getWorkflowId());
-		assertEquals(m1.getType(), m.getType());
-		assertEquals(m1.getJobName(), m.getJobName());
-
-		assertNotNull(m1.getBody());
-		m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
-		assertEquals(m1.getJobName(), m.getJobName());
-	}
-
-	@Test
-	public void toStringTest() {
-		final String expectedJson = "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
-		Message m = new Message();
-		m.setWorkflowId("wId");
-		m.setType(MessageType.ONGOING);
-		m.setJobName("Collection");
-		Map<String, String> body = new HashMap<>();
-		body.put("parsedItem", "300");
-		body.put("ExecutionTime", "30s");
-
-		m.setBody(body);
-
-		assertEquals(expectedJson, m.toString());
-	}
-}
--- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java
@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;

 import org.junit.jupiter.api.Test;

-public class RelationMapperTest {
+class RelationMapperTest {

 	@Test
-	public void testLoadRels() throws Exception {
+	void testLoadRels() throws Exception {

 		RelationMapper relationMapper = RelationMapper.load();
 		relationMapper.keySet().forEach(System.out::println);
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmcid"},"value":"21459329"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/synonyms.txt
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/synonyms.txt
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt
--- a/dhp-schemas/README.md
+++ b/dhp-schemas/README.md
@ -1,11 +0,0 @@
-Description of the project
--------------------------
-This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them. 
-Namely it defines the model for 
-  
-  **research product (result)** which subclasses in publication, dataset, other research product, software
-  **data source** object describing the data provider (institutional repository, aggregators, cris systems)
-  **organization** research bodies managing a data source or participating to a research project
-  **project** research project
- 
-Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline.
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@ -1,73 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>eu.dnetlib.dhp</groupId>
-        <artifactId>dhp</artifactId>
-        <version>1.2.4-SNAPSHOT</version>
-        <relativePath>../</relativePath>
-    </parent>
-
-    <artifactId>dhp-schemas</artifactId>
-    <packaging>jar</packaging>
-
-    <description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
-
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>net.alchim31.maven</groupId>
-                <artifactId>scala-maven-plugin</artifactId>
-                <version>4.0.1</version>
-                <executions>
-                    <execution>
-                        <id>scala-compile-first</id>
-                        <phase>initialize</phase>
-                        <goals>
-                            <goal>add-source</goal>
-                            <goal>compile</goal>
-                        </goals>
-                    </execution>
-                    <execution>
-                        <id>scala-test-compile</id>
-                        <phase>process-test-resources</phase>
-                        <goals>
-                            <goal>testCompile</goal>
-                        </goals>
-                    </execution>
-                </executions>
-                <configuration>
-                    <scalaVersion>${scala.version}</scalaVersion>
-                </configuration>
-            </plugin>
-
-        </plugins>
-    </build>
-
-    <dependencies>
-
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-        </dependency>
-
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-        </dependency>
-
-        <dependency>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-        </dependency>
-
-    </dependencies>
-
-
-</project>
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java
@ -1,40 +0,0 @@
-
-package eu.dnetlib.dhp.schema.action;
-
-import java.io.Serializable;
-
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
-
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-
-@JsonDeserialize(using = AtomicActionDeserializer.class)
-public class AtomicAction<T extends Oaf> implements Serializable {
-
-	private Class<T> clazz;
-
-	private T payload;
-
-	public AtomicAction() {
-	}
-
-	public AtomicAction(Class<T> clazz, T payload) {
-		this.clazz = clazz;
-		this.payload = payload;
-	}
-
-	public Class<T> getClazz() {
-		return clazz;
-	}
-
-	public void setClazz(Class<T> clazz) {
-		this.clazz = clazz;
-	}
-
-	public T getPayload() {
-		return payload;
-	}
-
-	public void setPayload(T payload) {
-		this.payload = payload;
-	}
-}
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java
@ -1,32 +0,0 @@
-
-package eu.dnetlib.dhp.schema.action;
-
-import java.io.IOException;
-
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.JsonDeserializer;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-
-public class AtomicActionDeserializer extends JsonDeserializer {
-
-	@Override
-	public Object deserialize(JsonParser jp, DeserializationContext ctxt)
-		throws IOException {
-		JsonNode node = jp.getCodec().readTree(jp);
-		String classTag = node.get("clazz").asText();
-		JsonNode payload = node.get("payload");
-		ObjectMapper mapper = new ObjectMapper();
-
-		try {
-			final Class<?> clazz = Class.forName(classTag);
-			return new AtomicAction(clazz, (Oaf) mapper.readValue(payload.toString(), clazz));
-		} catch (ClassNotFoundException e) {
-			throw new IOException(e);
-		}
-	}
-}
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java
@ -1,21 +0,0 @@
-
-package eu.dnetlib.dhp.schema.common;
-
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-
-/** Actual entity types in the Graph */
-public enum EntityType {
-	publication, dataset, otherresearchproduct, software, datasource, organization, project;
-
-	/**
-	 * Resolves the EntityType, given the relative class name
-	 *
-	 * @param clazz the given class name
-	 * @param <T> actual OafEntity subclass
-	 * @return the EntityType associated to the given class
-	 */
-	public static <T extends OafEntity> EntityType fromClass(Class<T> clazz) {
-
-		return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
-	}
-}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}`
				`@ -0,0 +1 @@`
				{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
				`@ -1 +0,0 @@`
				`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}`