[scholexplorer model update] dropping result.pid, all of them are moved as instance.alternateIdentifier(s)

Merge branch 'stable_ids' into scholexplorer_model_update
2021-05-27 17:25:01 +02:00 · 2021-05-27 17:20:32 +02:00 · 2021-05-27 17:17:27 +02:00 · 2021-05-27 15:10:51 +02:00 · 2021-05-27 12:22:47 +02:00 · 2021-05-26 18:20:23 +02:00
465 changed files with 33318 additions and 9377 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,8 @@
 *.iws
 *~
 .vscode
+.metals
+.bloop
 .classpath
 /*/.classpath
 /*/*/.classpath
@ -24,4 +26,5 @@
 spark-warehouse
 /**/job-override.properties
 /**/*.log
+/**/.factorypath

--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -7,6 +7,7 @@
 		<artifactId>dhp</artifactId>
 		<version>1.2.4-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
+
 	</parent>

 	<artifactId>dhp-common</artifactId>
@ -53,11 +54,6 @@
 			<groupId>com.fasterxml.jackson.core</groupId>
 			<artifactId>jackson-databind</artifactId>
 		</dependency>
-		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
-		<dependency>
-			<groupId>com.rabbitmq</groupId>
-			<artifactId>amqp-client</artifactId>
-		</dependency>
 		<dependency>
 			<groupId>net.sf.saxon</groupId>
 			<artifactId>Saxon-HE</artifactId>
@ -98,6 +94,16 @@
 			<artifactId>dnet-pace-core</artifactId>
 		</dependency>

+		<dependency>
+			<groupId>org.apache.httpcomponents</groupId>
+			<artifactId>httpclient</artifactId>
+		</dependency>
+
+		<dependency>
+			<groupId>org.mongodb</groupId>
+			<artifactId>mongo-java-driver</artifactId>
+		</dependency>
+
 		<dependency>
 			<groupId>eu.dnetlib.dhp</groupId>
 			<artifactId>dhp-schemas</artifactId>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java
@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.application;
+
+import java.io.*;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Maps;
+
+public class ApplicationUtils {
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
@ -1,10 +1,7 @@

 package eu.dnetlib.dhp.application;

-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Serializable;
-import java.io.StringWriter;
+import java.io.*;
 import java.util.*;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
@ -12,17 +9,21 @@ import java.util.zip.GZIPOutputStream;
 import org.apache.commons.cli.*;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

 public class ArgumentApplicationParser implements Serializable {

+	private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class);
+
 	private final Options options = new Options();
 	private final Map<String, String> objectMap = new HashMap<>();

 	private final List<String> compressedValues = new ArrayList<>();

-	public ArgumentApplicationParser(final String json_configuration) throws Exception {
+	public ArgumentApplicationParser(final String json_configuration) throws IOException {
 		final ObjectMapper mapper = new ObjectMapper();
 		final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
 		createOptionMap(configuration);
@ -33,7 +34,6 @@ public class ArgumentApplicationParser implements Serializable {
 	}

 	private void createOptionMap(final OptionsParameter[] configuration) {
-
 		Arrays
 			.stream(configuration)
 			.map(
@ -47,10 +47,6 @@ public class ArgumentApplicationParser implements Serializable {
 					return o;
 				})
 			.forEach(options::addOption);
-
-		// HelpFormatter formatter = new HelpFormatter();
-		// formatter.printHelp("myapp", null, options, null, true);
-
 	}

 	public static String decompressValue(final String abstractCompressed) {
@ -61,7 +57,7 @@ public class ArgumentApplicationParser implements Serializable {
 			IOUtils.copy(gis, stringWriter);
 			return stringWriter.toString();
 		} catch (Throwable e) {
-			System.out.println("Wrong value to decompress:" + abstractCompressed);
+			log.error("Wrong value to decompress:" + abstractCompressed);
 			throw new RuntimeException(e);
 		}
 	}
@ -74,7 +70,7 @@ public class ArgumentApplicationParser implements Serializable {
 		return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
 	}

-	public void parseArgument(final String[] args) throws Exception {
+	public void parseArgument(final String[] args) throws ParseException {
 		CommandLineParser parser = new BasicParser();
 		CommandLine cmd = parser.parse(options, args);
 		Arrays
--- a/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
+++ b/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
@ -1,5 +1,5 @@

-package eu.dnetlib.collector.worker.model;
+package eu.dnetlib.dhp.collection;

 import java.util.HashMap;
 import java.util.Map;
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -27,4 +27,26 @@ public class Constants {
 		coarCodeLabelMap.put("c_f1cf", "EMBARGO");
 	}

+	public static final String SEQUENCE_FILE_NAME = "/sequence_file";
+	public static final String REPORT_FILE_NAME = "/report";
+	public static final String MDSTORE_DATA_PATH = "/store";
+	public static final String MDSTORE_SIZE_PATH = "/size";
+
+	public static final String COLLECTION_MODE = "collectionMode";
+	public static final String METADATA_ENCODING = "metadataEncoding";
+	public static final String OOZIE_WF_PATH = "oozieWfPath";
+	public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
+
+	public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
+	public static final String REQUEST_DELAY = "requestDelay";
+	public static final String RETRY_DELAY = "retryDelay";
+	public static final String CONNECT_TIMEOUT = "connectTimeOut";
+	public static final String READ_TIMEOUT = "readTimeOut";
+	public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
+	public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
+
+	public static final String CONTENT_TOTALITEMS = "TotalItems";
+	public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
+	public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
+
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -14,7 +14,7 @@ public class DbClient implements Closeable {

 	private static final Log log = LogFactory.getLog(DbClient.class);

-	private Connection connection;
+	private final Connection connection;

 	public DbClient(final String address, final String login, final String password) {

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@ -100,7 +100,7 @@ public class MakeTarArchive implements Serializable {
 			BufferedInputStream bis = new BufferedInputStream(is);

 			int count;
-			byte data[] = new byte[1024];
+			byte[] data = new byte[1024];
 			while ((count = bis.read(data, 0, data.length)) != -1) {
 				ar.write(data, 0, count);
 			}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/MdstoreClient.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/MdstoreClient.java
@ -1,39 +1,60 @@

-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common;

 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Optional;
 import java.util.stream.StreamSupport;

 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.bson.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 import com.google.common.collect.Iterables;
+import com.mongodb.BasicDBObject;
 import com.mongodb.MongoClient;
 import com.mongodb.MongoClientURI;
+import com.mongodb.QueryBuilder;
 import com.mongodb.client.MongoCollection;
 import com.mongodb.client.MongoDatabase;

 public class MdstoreClient implements Closeable {

+	private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
+
 	private final MongoClient client;
 	private final MongoDatabase db;

 	private static final String COLL_METADATA = "metadata";
 	private static final String COLL_METADATA_MANAGER = "metadataManager";

-	private static final Log log = LogFactory.getLog(MdstoreClient.class);
-
 	public MdstoreClient(final String baseUrl, final String dbName) {
 		this.client = new MongoClient(new MongoClientURI(baseUrl));
 		this.db = getDb(client, dbName);
 	}

+	public MongoCollection<Document> mdStore(final String mdId) {
+		BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
+
+		log.info("querying current mdId: {}", query.toJson());
+
+		final String currentId = Optional
+			.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
+			.map(r -> r.first())
+			.map(d -> d.getString("currentId"))
+			.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
+
+		log.info("currentId: {}", currentId);
+
+		return getColl(db, currentId, true);
+	}
+
 	public Map<String, String> validCollections(
 		final String mdFormat, final String mdLayout, final String mdInterpretation) {

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -13,9 +13,9 @@ import okio.Source;

 public class InputStreamRequestBody extends RequestBody {

-	private InputStream inputStream;
-	private MediaType mediaType;
-	private long lenght;
+	private final InputStream inputStream;
+	private final MediaType mediaType;
+	private final long lenght;

 	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
@ -0,0 +1,72 @@
+
+package eu.dnetlib.dhp.common.rest;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class DNetRestClient {
+
+	private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
+
+	private static final ObjectMapper mapper = new ObjectMapper();
+
+	public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
+		final HttpGet httpGet = new HttpGet(url);
+		return doHTTPRequest(httpGet, clazz);
+	}
+
+	public static String doGET(final String url) throws Exception {
+		final HttpGet httpGet = new HttpGet(url);
+		return doHTTPRequest(httpGet);
+	}
+
+	public static <V> String doPOST(final String url, V objParam) throws Exception {
+		final HttpPost httpPost = new HttpPost(url);
+
+		if (objParam != null) {
+			final StringEntity entity = new StringEntity(mapper.writeValueAsString(objParam));
+			httpPost.setEntity(entity);
+			httpPost.setHeader("Accept", "application/json");
+			httpPost.setHeader("Content-type", "application/json");
+		}
+		return doHTTPRequest(httpPost);
+	}
+
+	public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception {
+		return mapper.readValue(doPOST(url, objParam), clazz);
+	}
+
+	private static String doHTTPRequest(final HttpUriRequest r) throws Exception {
+		CloseableHttpClient client = HttpClients.createDefault();
+
+		log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
+		log
+			.info(
+				"request headers: {}",
+				Arrays
+					.asList(r.getAllHeaders())
+					.stream()
+					.map(h -> h.getName() + ":" + h.getValue())
+					.collect(Collectors.joining(",")));
+
+		CloseableHttpResponse response = client.execute(r);
+		return IOUtils.toString(response.getEntity().getContent());
+	}
+
+	private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
+		return mapper.readValue(doHTTPRequest(r), clazz);
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;

 import java.io.Serializable;
 import java.util.HashMap;
@ -10,8 +10,8 @@ import org.apache.commons.lang3.StringUtils;

 import com.google.common.collect.Maps;

-import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;

 public class Vocabulary implements Serializable {

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;

 import java.io.Serializable;
 import java.util.*;
@ -7,8 +7,8 @@ import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringUtils;

-import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

@ -67,6 +67,10 @@ public class VocabularyGroup implements Serializable {

 	private final Map<String, Vocabulary> vocs = new HashMap<>();

+	public Set<String> vocabularyNames() {
+		return vocs.keySet();
+	}
+
 	public void addVocabulary(final String id, final String name) {
 		vocs.put(id.toLowerCase(), new Vocabulary(id, name));
 	}
@ -118,7 +122,31 @@ public class VocabularyGroup implements Serializable {
 		return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
 	}

+	/**
+	 * getSynonymAsQualifierCaseSensitive
+	 *
+	 * refelects the situation to check caseSensitive vocabulary
+	 */
+	public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
+		if (StringUtils.isBlank(vocId)) {
+			return OafMapperUtils.unknown("", "");
+		}
+		return vocs.get(vocId).getSynonymAsQualifier(syn);
+	}
+
+	/**
+	 * termExists
+	 *
+	 * two methods: without and with caseSensitive check
+	 */
 	public boolean termExists(final String vocId, final String id) {
+		return termExists(vocId, id, Boolean.FALSE);
+	}
+
+	public boolean termExists(final String vocId, final String id, final Boolean caseSensitive) {
+		if (Boolean.TRUE.equals(caseSensitive)) {
+			return vocabularyExists(vocId) && vocs.get(vocId).termExists(id);
+		}
 		return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
 	}

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;

 import java.io.Serializable;

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
@ -0,0 +1,64 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class Message implements Serializable {
+
+	private static final long serialVersionUID = 401753881204524893L;
+
+	public static String CURRENT_PARAM = "current";
+	public static String TOTAL_PARAM = "total";
+
+	private MessageType messageType;
+
+	private String workflowId;
+
+	private Map<String, String> body;
+
+	public Message() {
+	}
+
+	public Message(final MessageType messageType, final String workflowId) {
+		this(messageType, workflowId, new LinkedHashMap<>());
+	}
+
+	public Message(final MessageType messageType, final String workflowId, final Map<String, String> body) {
+		this.messageType = messageType;
+		this.workflowId = workflowId;
+		this.body = body;
+	}
+
+	public MessageType getMessageType() {
+		return messageType;
+	}
+
+	public void setMessageType(MessageType messageType) {
+		this.messageType = messageType;
+	}
+
+	public String getWorkflowId() {
+		return workflowId;
+	}
+
+	public void setWorkflowId(final String workflowId) {
+		this.workflowId = workflowId;
+	}
+
+	public Map<String, String> getBody() {
+		return body;
+	}
+
+	public void setBody(final Map<String, String> body) {
+		this.body = body;
+	}
+
+	@Override
+	public String toString() {
+		return String.format("Message [type=%s, workflowId=%s, body=%s]", messageType, workflowId, body);
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
@ -0,0 +1,94 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPut;
+import org.apache.http.entity.ContentType;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class MessageSender {
+
+	private static final Logger log = LoggerFactory.getLogger(MessageSender.class);
+
+	private static final int SOCKET_TIMEOUT_MS = 2000;
+
+	private static final int CONNECTION_REQUEST_TIMEOUT_MS = 2000;
+
+	private static final int CONNTECTION_TIMEOUT_MS = 2000;
+
+	private final ObjectMapper objectMapper = new ObjectMapper();
+
+	private final String dnetMessageEndpoint;
+
+	private final String workflowId;
+
+	private final ExecutorService executorService = Executors.newCachedThreadPool();
+
+	public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
+		this.workflowId = workflowId;
+		this.dnetMessageEndpoint = dnetMessageEndpoint;
+	}
+
+	public void sendMessage(final Message message) {
+		executorService.submit(() -> _sendMessage(message));
+	}
+
+	public void sendMessage(final Long current, final Long total) {
+		sendMessage(createOngoingMessage(current, total));
+	}
+
+	public void sendReport(final Map<String, String> report) {
+		sendMessage(new Message(MessageType.REPORT, workflowId, report));
+	}
+
+	private Message createOngoingMessage(final Long current, final Long total) {
+		final Message m = new Message(MessageType.ONGOING, workflowId);
+		m.getBody().put(Message.CURRENT_PARAM, current.toString());
+		if (total != null) {
+			m.getBody().put(Message.TOTAL_PARAM, total.toString());
+		}
+		return m;
+	}
+
+	private void _sendMessage(final Message message) {
+		try {
+			final String json = objectMapper.writeValueAsString(message);
+
+			final HttpPut req = new HttpPut(dnetMessageEndpoint);
+			req.setEntity(new StringEntity(json, ContentType.APPLICATION_JSON));
+
+			final RequestConfig requestConfig = RequestConfig
+				.custom()
+				.setConnectTimeout(CONNTECTION_TIMEOUT_MS)
+				.setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT_MS)
+				.setSocketTimeout(SOCKET_TIMEOUT_MS)
+				.build();
+
+			try (final CloseableHttpClient client = HttpClients
+				.custom()
+				.setDefaultRequestConfig(requestConfig)
+				.build();
+				final CloseableHttpResponse response = client.execute(req)) {
+				log.debug("Sent Message to " + dnetMessageEndpoint);
+				log.debug("MESSAGE:" + message);
+			} catch (final Throwable e) {
+				log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
+			}
+		} catch (final JsonProcessingException e) {
+			log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.message;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.lang3.StringUtils;
+
+public enum MessageType implements Serializable {
+
+	ONGOING, REPORT;
+
+	public MessageType from(String value) {
+		return Optional
+			.ofNullable(value)
+			.map(StringUtils::upperCase)
+			.map(MessageType::valueOf)
+			.orElseThrow(() -> new IllegalArgumentException("unknown message type: " + value));
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
@ -1,121 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import java.io.Serializable;
-
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-/** This class models a record inside the new Metadata store collection on HDFS * */
-public class MetadataRecord implements Serializable {
-
-	/** The D-Net Identifier associated to the record */
-	private String id;
-
-	/** The original Identifier of the record */
-	private String originalId;
-
-	/** The encoding of the record, should be JSON or XML */
-	private String encoding;
-
-	/**
-	 * The information about the provenance of the record see @{@link Provenance} for the model of this information
-	 */
-	private Provenance provenance;
-
-	/** The content of the metadata */
-	private String body;
-
-	/** the date when the record has been stored */
-	private long dateOfCollection;
-
-	/** the date when the record has been stored */
-	private long dateOfTransformation;
-
-	public MetadataRecord() {
-		this.dateOfCollection = System.currentTimeMillis();
-	}
-
-	public MetadataRecord(
-		String originalId,
-		String encoding,
-		Provenance provenance,
-		String body,
-		long dateOfCollection) {
-
-		this.originalId = originalId;
-		this.encoding = encoding;
-		this.provenance = provenance;
-		this.body = body;
-		this.dateOfCollection = dateOfCollection;
-		this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public String getOriginalId() {
-		return originalId;
-	}
-
-	public void setOriginalId(String originalId) {
-		this.originalId = originalId;
-	}
-
-	public String getEncoding() {
-		return encoding;
-	}
-
-	public void setEncoding(String encoding) {
-		this.encoding = encoding;
-	}
-
-	public Provenance getProvenance() {
-		return provenance;
-	}
-
-	public void setProvenance(Provenance provenance) {
-		this.provenance = provenance;
-	}
-
-	public String getBody() {
-		return body;
-	}
-
-	public void setBody(String body) {
-		this.body = body;
-	}
-
-	public long getDateOfCollection() {
-		return dateOfCollection;
-	}
-
-	public void setDateOfCollection(long dateOfCollection) {
-		this.dateOfCollection = dateOfCollection;
-	}
-
-	public long getDateOfTransformation() {
-		return dateOfTransformation;
-	}
-
-	public void setDateOfTransformation(long dateOfTransformation) {
-		this.dateOfTransformation = dateOfTransformation;
-	}
-
-	@Override
-	public boolean equals(Object o) {
-		if (!(o instanceof MetadataRecord)) {
-			return false;
-		}
-		return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
-	}
-
-	@Override
-	public int hashCode() {
-		return id.hashCode();
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
@ -1,52 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import java.io.Serializable;
-
-/**
- * @author Sandro La Bruzzo
- *         <p>
- *         Provenace class models the provenance of the record in the metadataStore It contains the identifier and the
- *         name of the datasource that gives the record
- */
-public class Provenance implements Serializable {
-
-	private String datasourceId;
-
-	private String datasourceName;
-
-	private String nsPrefix;
-
-	public Provenance() {
-	}
-
-	public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
-		this.datasourceId = datasourceId;
-		this.datasourceName = datasourceName;
-		this.nsPrefix = nsPrefix;
-	}
-
-	public String getDatasourceId() {
-		return datasourceId;
-	}
-
-	public void setDatasourceId(String datasourceId) {
-		this.datasourceId = datasourceId;
-	}
-
-	public String getDatasourceName() {
-		return datasourceName;
-	}
-
-	public void setDatasourceName(String datasourceName) {
-		this.datasourceName = datasourceName;
-	}
-
-	public String getNsPrefix() {
-		return nsPrefix;
-	}
-
-	public void setNsPrefix(String nsPrefix) {
-		this.nsPrefix = nsPrefix;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
@ -1,49 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-
-public class ResultTypeComparator implements Comparator<Result> {
-
-	@Override
-	public int compare(Result left, Result right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		String lClass = left.getResulttype().getClassid();
-		String rClass = right.getResulttype().getClassid();
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-			return 1;
-
-		if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-			return -1;
-		if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-			return 1;
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return lClass.compareTo(rClass);
-	}
-}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
@ -1,33 +1,26 @@

-package eu.dnetlib.dhp.oa.graph.clean;
+package eu.dnetlib.dhp.schema.oaf.utils;

 import java.util.*;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;

 import org.apache.commons.lang3.StringUtils;

-import com.clearspring.analytics.util.Lists;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;

-import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;

-public class CleaningFunctions {
-
-	public static final String DOI_PREFIX_REGEX = "^10\\.";
-
-	public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
-	public static final int ORCID_LEN = 19;
+public class GraphCleaningFunctions extends CleaningFunctions {

 	public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
-
-	public static final Set<String> PID_BLACKLIST = new HashSet<>();
-
-	static {
-		PID_BLACKLIST.add("none");
-		PID_BLACKLIST.add("na");
-	}
+	public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
+	public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
+	public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]";
+	public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10;

 	public static <T extends Oaf> T fixVocabularyNames(T value) {
 		if (value instanceof Datasource) {
@ -59,23 +52,17 @@ public class CleaningFunctions {
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
-				r
-					.getAuthor()
-					.stream()
-					.filter(Objects::nonNull)
-					.forEach(a -> {
-						if (Objects.nonNull(a.getPid())) {
-							a
-								.getPid()
-								.stream()
-								.filter(Objects::nonNull)
-								.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES));
-						}
-					});
+				r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
+					if (Objects.nonNull(a.getPid())) {
+						a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
+							fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
+						});
+					}
+				});
 			}
 			if (value instanceof Publication) {

-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
+			} else if (value instanceof Dataset) {

 			} else if (value instanceof OtherResearchProduct) {

@ -87,7 +74,37 @@ public class CleaningFunctions {
 		return value;
 	}

-	public static <T extends Oaf> T fixDefaults(T value) {
+	public static <T extends Oaf> boolean filter(T value) {
+		if (value instanceof Datasource) {
+			// nothing to evaluate here
+		} else if (value instanceof Project) {
+			// nothing to evaluate here
+		} else if (value instanceof Organization) {
+			// nothing to evaluate here
+		} else if (value instanceof Relation) {
+			// nothing to clean here
+		} else if (value instanceof Result) {
+
+			Result r = (Result) value;
+
+			if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) {
+				return false;
+			}
+
+			if (value instanceof Publication) {
+
+			} else if (value instanceof Dataset) {
+
+			} else if (value instanceof OtherResearchProduct) {
+
+			} else if (value instanceof Software) {
+
+			}
+		}
+		return true;
+	}
+
+	public static <T extends Oaf> T cleanup(T value) {
 		if (value instanceof Datasource) {
 			// nothing to clean here
 		} else if (value instanceof Project) {
@ -110,16 +127,6 @@ public class CleaningFunctions {
 					.setLanguage(
 						qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
 			}
-			if (Objects.nonNull(r.getCountry())) {
-				r
-					.setCountry(
-						r
-							.getCountry()
-							.stream()
-							.filter(Objects::nonNull)
-							.filter(c -> StringUtils.isNotBlank(c.getClassid()))
-							.collect(Collectors.toList()));
-			}
 			if (Objects.nonNull(r.getSubject())) {
 				r
 					.setSubject(
@ -130,7 +137,7 @@ public class CleaningFunctions {
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
 							.filter(sp -> Objects.nonNull(sp.getQualifier()))
 							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
-							.map(CleaningFunctions::cleanValue)
+							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getTitle())) {
@ -141,7 +148,13 @@ public class CleaningFunctions {
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
-							.map(CleaningFunctions::cleanValue)
+							.filter(
+								sp -> sp
+									.getValue()
+									.toLowerCase()
+									.replaceAll(TITLE_FILTER_REGEX, "")
+									.length() > TITLE_FILTER_RESIDUAL_LENGTH)
+							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getDescription())) {
@ -152,22 +165,11 @@ public class CleaningFunctions {
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
-							.map(CleaningFunctions::cleanValue)
+							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getPid())) {
-				r
-					.setPid(
-						r
-							.getPid()
-							.stream()
-							.filter(Objects::nonNull)
-							.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
-							.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
-							.filter(sp -> Objects.nonNull(sp.getQualifier()))
-							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
-							.map(CleaningFunctions::normalizePidValue)
-							.collect(Collectors.toList()));
+				r.setPid(processPidCleaning(r.getPid()));
 			}
 			if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
 				r
@ -175,11 +177,36 @@ public class CleaningFunctions {
 						qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
 			}
 			if (Objects.nonNull(r.getInstance())) {
+
 				for (Instance i : r.getInstance()) {
+					Optional
+						.ofNullable(i.getPid())
+						.ifPresent(pid -> {
+							final Set<StructuredProperty> pids = pid
+								.stream()
+								.filter(Objects::nonNull)
+								.filter(p -> StringUtils.isNotBlank(p.getValue()))
+								.collect(Collectors.toCollection(HashSet::new));
+
+							Optional
+								.ofNullable(i.getAlternateIdentifier())
+								.ifPresent(altId -> {
+									final Set<StructuredProperty> altIds = altId
+										.stream()
+										.filter(Objects::nonNull)
+										.filter(p -> StringUtils.isNotBlank(p.getValue()))
+										.collect(Collectors.toCollection(HashSet::new));
+
+									i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
+								});
+						});
+
 					if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
 						i
 							.setAccessright(
-								qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
+								accessRight(
+									ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+									ModelConstants.DNET_ACCESS_MODES));
 					}
 					if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
 						i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
@ -190,37 +217,19 @@ public class CleaningFunctions {
 				}
 			}
 			if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
-				Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance());
+				Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
 				if (Objects.isNull(bestaccessrights)) {
 					r
 						.setBestaccessright(
-							qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
+							qualifier(
+								ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+								ModelConstants.DNET_ACCESS_MODES));
 				} else {
 					r.setBestaccessright(bestaccessrights);
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
-				r
-					.setAuthor(
-						r
-							.getAuthor()
-							.stream()
-							.filter(a -> Objects.nonNull(a))
-							.filter(a -> StringUtils.isNotBlank(a.getFullname()))
-							.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
-							.collect(Collectors.toList()));
-
-				boolean nullRank = r
-					.getAuthor()
-					.stream()
-					.anyMatch(a -> Objects.isNull(a.getRank()));
-				if (nullRank) {
-					int i = 1;
-					for (Author author : r.getAuthor()) {
-						author.setRank(i++);
-					}
-				}
-
+				final List<Author> authors = Lists.newArrayList();
 				for (Author a : r.getAuthor()) {
 					if (Objects.isNull(a.getPid())) {
 						a.setPid(Lists.newArrayList());
@ -234,57 +243,44 @@ public class CleaningFunctions {
 									.filter(p -> Objects.nonNull(p.getQualifier()))
 									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.map(p -> {
-										// hack to distinguish orcid from orcid_pending
-										String pidProvenance = Optional
-											.ofNullable(p.getDataInfo())
-											.map(
-												d -> Optional
-													.ofNullable(d.getProvenanceaction())
-													.map(Qualifier::getClassid)
-													.orElse(""))
-											.orElse("");
-										if (p
-											.getQualifier()
-											.getClassid()
-											.toLowerCase()
-											.contains(ModelConstants.ORCID)) {
-											if (pidProvenance
-												.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
-												p.getQualifier().setClassid(ModelConstants.ORCID);
-											} else {
-												p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
-											}
-											final String orcid = p
-												.getValue()
-												.trim()
-												.toLowerCase()
-												.replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
-											if (orcid.length() == ORCID_LEN) {
-												p.setValue(orcid);
-											} else {
-												p.setValue("");
-											}
-										}
+										p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
 										return p;
 									})
 									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.collect(
 										Collectors
 											.toMap(
-												p -> p.getQualifier().getClassid() + p.getValue(),
-												Function.identity(),
-												(p1, p2) -> p1,
+												StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1,
 												LinkedHashMap::new))
 									.values()
 									.stream()
 									.collect(Collectors.toList()));
 					}
+					if (StringUtils.isBlank(a.getFullname())) {
+						if (StringUtils.isNotBlank(a.getName()) && StringUtils.isNotBlank(a.getSurname())) {
+							a.setFullname(a.getSurname() + ", " + a.getName());
+						}
+					}
+					if (StringUtils.isNotBlank(a.getFullname()) && isValidAuthorName(a)) {
+						authors.add(a);
+					}
 				}

+				boolean nullRank = authors
+					.stream()
+					.anyMatch(a -> Objects.isNull(a.getRank()));
+				if (nullRank) {
+					int i = 1;
+					for (Author author : authors) {
+						author.setRank(i++);
+					}
+				}
+				r.setAuthor(authors);
+
 			}
 			if (value instanceof Publication) {

-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
+			} else if (value instanceof Dataset) {

 			} else if (value instanceof OtherResearchProduct) {

@ -296,6 +292,49 @@ public class CleaningFunctions {
 		return value;
 	}

+	// HELPERS
+
+	private static boolean isValidAuthorName(Author a) {
+		return !Stream
+			.of(a.getFullname(), a.getName(), a.getSurname())
+			.filter(s -> s != null && !s.isEmpty())
+			.collect(Collectors.joining(""))
+			.toLowerCase()
+			.matches(INVALID_AUTHOR_REGEX);
+	}
+
+	private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
+		return pids
+			.stream()
+			.filter(Objects::nonNull)
+			.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
+			.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
+			.filter(sp -> Objects.nonNull(sp.getQualifier()))
+			.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+			.map(CleaningFunctions::normalizePidValue)
+			.filter(CleaningFunctions::pidFilter)
+			.collect(Collectors.toList());
+	}
+
+	private static void fixVocabName(Qualifier q, String vocabularyName) {
+		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
+			q.setSchemeid(vocabularyName);
+			q.setSchemename(vocabularyName);
+		}
+	}
+
+	private static AccessRight accessRight(String classid, String classname, String scheme) {
+		return OafMapperUtils
+			.accessRight(
+				classid, classname, scheme, scheme);
+	}
+
+	private static Qualifier qualifier(String classid, String classname, String scheme) {
+		return OafMapperUtils
+			.qualifier(
+				classid, classname, scheme, scheme);
+	}
+
 	protected static StructuredProperty cleanValue(StructuredProperty s) {
 		s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
 		return s;
@ -306,39 +345,4 @@ public class CleaningFunctions {
 		return s;
 	}

-	// HELPERS
-
-	private static void fixVocabName(Qualifier q, String vocabularyName) {
-		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
-			q.setSchemeid(vocabularyName);
-			q.setSchemename(vocabularyName);
-		}
-	}
-
-	private static Qualifier qualifier(String classid, String classname, String scheme) {
-		return OafMapperUtils
-			.qualifier(
-				classid, classname, scheme, scheme);
-	}
-
-	/**
-	 * Utility method that normalises PID values on a per-type basis.
-	 * @param pid the PID whose value will be normalised.
-	 * @return the PID containing the normalised value.
-	 */
-	public static StructuredProperty normalizePidValue(StructuredProperty pid) {
-		String value = Optional
-			.ofNullable(pid.getValue())
-			.map(String::trim)
-			.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
-		switch (pid.getQualifier().getClassid()) {
-
-			// TODO add cleaning for more PID types as needed
-			case "doi":
-				pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
-				break;
-		}
-		return pid;
-	}
-
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -1,11 +1,9 @@

-package eu.dnetlib.dhp.schema.oaf;
+package eu.dnetlib.dhp.schema.oaf.utils;

-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+
+import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import java.util.function.Predicate;
@ -13,42 +11,45 @@ import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringUtils;

-import com.google.common.base.Joiner;
-
+import eu.dnetlib.dhp.schema.common.AccessRightComparator;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.dhp.schema.oaf.*;

 public class OafMapperUtils {

-	public static Oaf merge(final Oaf o1, final Oaf o2) {
-		if (ModelSupport.isSubClass(o1, OafEntity.class)) {
-			if (ModelSupport.isSubClass(o1, Result.class)) {
-
-				return mergeResults((Result) o1, (Result) o2);
-			} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
-				((Datasource) o1).mergeFrom((Datasource) o2);
-			} else if (ModelSupport.isSubClass(o1, Organization.class)) {
-				((Organization) o1).mergeFrom((Organization) o2);
-			} else if (ModelSupport.isSubClass(o1, Project.class)) {
-				((Project) o1).mergeFrom((Project) o2);
-			} else {
-				throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
-			}
-		} else if (ModelSupport.isSubClass(o1, Relation.class)) {
-			((Relation) o1).mergeFrom((Relation) o2);
+	public static Oaf merge(final Oaf left, final Oaf right) {
+		if (ModelSupport.isSubClass(left, OafEntity.class)) {
+			return mergeEntities((OafEntity) left, (OafEntity) right);
+		} else if (ModelSupport.isSubClass(left, Relation.class)) {
+			((Relation) left).mergeFrom((Relation) right);
 		} else {
-			throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
+			throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName());
 		}
-		return o1;
+		return left;
 	}

-	public static Result mergeResults(Result r1, Result r2) {
-		if (new ResultTypeComparator().compare(r1, r2) < 0) {
-			r1.mergeFrom(r2);
-			return r1;
+	public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
+		if (ModelSupport.isSubClass(left, Result.class)) {
+			return mergeResults((Result) left, (Result) right);
+		} else if (ModelSupport.isSubClass(left, Datasource.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Organization.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Project.class)) {
+			left.mergeFrom(right);
 		} else {
-			r2.mergeFrom(r1);
-			return r2;
+			throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
+		}
+		return left;
+	}
+
+	public static Result mergeResults(Result left, Result right) {
+		if (new ResultTypeComparator().compare(left, right) < 0) {
+			left.mergeFrom(right);
+			return left;
+		} else {
+			right.mergeFrom(left);
+			return right;
 		}
 	}

@ -104,6 +105,29 @@ public class OafMapperUtils {
 		return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
 	}

+	public static AccessRight accessRight(
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename) {
+		return accessRight(classid, classname, schemeid, schemename, null);
+	}
+
+	public static AccessRight accessRight(
+		final String classid,
+		final String classname,
+		final String schemeid,
+		final String schemename,
+		final OpenAccessRoute openAccessRoute) {
+		final AccessRight accessRight = new AccessRight();
+		accessRight.setClassid(classid);
+		accessRight.setClassname(classname);
+		accessRight.setSchemeid(schemeid);
+		accessRight.setSchemename(schemename);
+		accessRight.setOpenAccessRoute(openAccessRoute);
+		return accessRight;
+	}
+
 	public static Qualifier qualifier(
 		final String classid,
 		final String classname,
@ -117,6 +141,15 @@ public class OafMapperUtils {
 		return q;
 	}

+	public static Qualifier qualifier(final Qualifier qualifier) {
+		final Qualifier q = new Qualifier();
+		q.setClassid(qualifier.getClassid());
+		q.setClassname(qualifier.getClassname());
+		q.setSchemeid(qualifier.getSchemeid());
+		q.setSchemename(qualifier.getSchemename());
+		return q;
+	}
+
 	public static StructuredProperty structuredProperty(
 		final String value,
 		final String classid,
@ -267,7 +300,7 @@ public class OafMapperUtils {
 		} else if (to_md5) {
 			final String nsPrefix = StringUtils.substringBefore(originalId, "::");
 			final String rest = StringUtils.substringAfter(originalId, "::");
-			return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
+			return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
 		} else {
 			return String.format("%s|%s", prefix, originalId);
 		}
@ -300,4 +333,36 @@ public class OafMapperUtils {
 		final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
 		return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
 	}
+
+	public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
+		return getBestAccessRights(instanceList);
+	}
+
+	protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
+		if (instanceList != null) {
+			final Optional<AccessRight> min = instanceList
+				.stream()
+				.map(i -> i.getAccessright())
+				.min(new AccessRightComparator<>());
+
+			final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier();
+
+			if (StringUtils.isBlank(rights.getClassid())) {
+				rights.setClassid(UNKNOWN);
+			}
+			if (StringUtils.isBlank(rights.getClassname())
+				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
+				rights.setClassname(NOT_AVAILABLE);
+			}
+			if (StringUtils.isBlank(rights.getSchemeid())) {
+				rights.setSchemeid(DNET_ACCESS_MODES);
+			}
+			if (StringUtils.isBlank(rights.getSchemename())) {
+				rights.setSchemename(DNET_ACCESS_MODES);
+			}
+
+			return rights;
+		}
+		return null;
+	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
@ -1,18 +1,29 @@

 package eu.dnetlib.dhp.utils;

-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
+import java.io.*;
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.util.List;
+import java.util.Map;
+import java.util.Properties;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;

 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.binary.Base64OutputStream;
 import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SaveMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Maps;
 import com.jayway.jsonpath.JsonPath;

 import net.minidev.json.JSONArray;
@ -21,6 +32,8 @@ import scala.collection.Seq;

 public class DHPUtils {

+	private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
+
 	public static Seq<String> toSeq(List<String> list) {
 		return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
 	}
@ -79,4 +92,72 @@ public class DHPUtils {
 			return "";
 		}
 	}
+
+	public static final ObjectMapper MAPPER = new ObjectMapper();
+
+	public static void writeHdfsFile(final Configuration conf, final String content, final String path)
+		throws IOException {
+
+		log.info("writing file {}, size {}", path, content.length());
+		try (FileSystem fs = FileSystem.get(conf);
+			BufferedOutputStream os = new BufferedOutputStream(fs.create(new Path(path)))) {
+			os.write(content.getBytes(StandardCharsets.UTF_8));
+			os.flush();
+		}
+	}
+
+	public static String readHdfsFile(Configuration conf, String path) throws IOException {
+		log.info("reading file {}", path);
+
+		try (FileSystem fs = FileSystem.get(conf)) {
+			final Path p = new Path(path);
+			if (!fs.exists(p)) {
+				throw new FileNotFoundException(path);
+			}
+			return IOUtils.toString(fs.open(p));
+		}
+	}
+
+	public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
+		return MAPPER.readValue(readHdfsFile(conf, path), clazz);
+	}
+
+	public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
+		log.info("saving dataset in: {}", targetPath);
+		mdstore
+			.write()
+			.mode(SaveMode.Overwrite)
+			.format("parquet")
+			.save(targetPath);
+	}
+
+	public static Configuration getHadoopConfiguration(String nameNode) {
+		// ====== Init HDFS File System Object
+		Configuration conf = new Configuration();
+		// Set FileSystem URI
+		conf.set("fs.defaultFS", nameNode);
+		// Because of Maven
+		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+
+		System.setProperty("hadoop.home.dir", "/");
+		return conf;
+	}
+
+	public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
+		File file = new File(System.getProperty("oozie.action.output.properties"));
+		Properties props = new Properties();
+		report.forEach((k, v) -> props.setProperty(k, v));
+
+		try (OutputStream os = new FileOutputStream(file)) {
+			props.store(os, "");
+		}
+	}
+
+	public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
+		Map<String, String> report = Maps.newHashMap();
+		report.put(paramName, value);
+
+		populateOOZIEEnv(report);
+	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
@ -15,8 +15,8 @@ public class ISLookupClientFactory {

 	private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);

-	private static int requestTimeout = 60000 * 10;
-	private static int connectTimeout = 60000 * 10;
+	private static final int requestTimeout = 60000 * 10;
+	private static final int connectTimeout = 60000 * 10;

 	public static ISLookUpService getLookUpService(final String isLookupUrl) {
 		return getServiceStub(ISLookUpService.class, isLookupUrl);
--- a/dhp-common/src/main/java/eu/dnetlib/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/Message.java
@ -1,76 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.util.Map;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-public class Message {
-
-	private String workflowId;
-
-	private String jobName;
-
-	private MessageType type;
-
-	private Map<String, String> body;
-
-	public static Message fromJson(final String json) throws IOException {
-		final ObjectMapper jsonMapper = new ObjectMapper();
-		return jsonMapper.readValue(json, Message.class);
-	}
-
-	public Message() {
-	}
-
-	public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
-		this.workflowId = workflowId;
-		this.jobName = jobName;
-		this.type = type;
-		this.body = body;
-	}
-
-	public String getWorkflowId() {
-		return workflowId;
-	}
-
-	public void setWorkflowId(String workflowId) {
-		this.workflowId = workflowId;
-	}
-
-	public String getJobName() {
-		return jobName;
-	}
-
-	public void setJobName(String jobName) {
-		this.jobName = jobName;
-	}
-
-	public MessageType getType() {
-		return type;
-	}
-
-	public void setType(MessageType type) {
-		this.type = type;
-	}
-
-	public Map<String, String> getBody() {
-		return body;
-	}
-
-	public void setBody(Map<String, String> body) {
-		this.body = body;
-	}
-
-	@Override
-	public String toString() {
-		final ObjectMapper jsonMapper = new ObjectMapper();
-		try {
-			return jsonMapper.writeValueAsString(this);
-		} catch (JsonProcessingException e) {
-			return null;
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import com.rabbitmq.client.AMQP;
-import com.rabbitmq.client.Channel;
-import com.rabbitmq.client.DefaultConsumer;
-import com.rabbitmq.client.Envelope;
-
-public class MessageConsumer extends DefaultConsumer {
-
-	final LinkedBlockingQueue<Message> queueMessages;
-
-	/**
-	 * Constructs a new instance and records its association to the passed-in channel.
-	 *
-	 * @param channel the channel to which this consumer is attached
-	 * @param queueMessages
-	 */
-	public MessageConsumer(Channel channel, LinkedBlockingQueue<Message> queueMessages) {
-		super(channel);
-		this.queueMessages = queueMessages;
-	}
-
-	@Override
-	public void handleDelivery(
-		String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
-		throws IOException {
-		final String json = new String(body, StandardCharsets.UTF_8);
-		Message message = Message.fromJson(json);
-		try {
-			this.queueMessages.put(message);
-			System.out.println("Receiving Message " + message);
-		} catch (InterruptedException e) {
-			if (message.getType() == MessageType.REPORT)
-				throw new RuntimeException("Error on sending message");
-			else {
-				// TODO LOGGING EXCEPTION
-			}
-		} finally {
-			getChannel().basicAck(envelope.getDeliveryTag(), false);
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
@ -1,136 +0,0 @@
-
-package eu.dnetlib.message;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.TimeoutException;
-
-import com.rabbitmq.client.Channel;
-import com.rabbitmq.client.Connection;
-import com.rabbitmq.client.ConnectionFactory;
-
-public class MessageManager {
-
-	private final String messageHost;
-
-	private final String username;
-
-	private final String password;
-
-	private Connection connection;
-
-	private final Map<String, Channel> channels = new HashMap<>();
-
-	private boolean durable;
-
-	private boolean autodelete;
-
-	private final LinkedBlockingQueue<Message> queueMessages;
-
-	public MessageManager(
-		String messageHost,
-		String username,
-		String password,
-		final LinkedBlockingQueue<Message> queueMessages) {
-		this.queueMessages = queueMessages;
-		this.messageHost = messageHost;
-		this.username = username;
-		this.password = password;
-	}
-
-	public MessageManager(
-		String messageHost,
-		String username,
-		String password,
-		boolean durable,
-		boolean autodelete,
-		final LinkedBlockingQueue<Message> queueMessages) {
-		this.queueMessages = queueMessages;
-		this.messageHost = messageHost;
-		this.username = username;
-		this.password = password;
-
-		this.durable = durable;
-		this.autodelete = autodelete;
-	}
-
-	private Connection createConnection() throws IOException, TimeoutException {
-		ConnectionFactory factory = new ConnectionFactory();
-		factory.setHost(this.messageHost);
-		factory.setUsername(this.username);
-		factory.setPassword(this.password);
-		return factory.newConnection();
-	}
-
-	private Channel createChannel(
-		final Connection connection,
-		final String queueName,
-		final boolean durable,
-		final boolean autodelete)
-		throws Exception {
-		Map<String, Object> args = new HashMap<>();
-		args.put("x-message-ttl", 10000);
-		Channel channel = connection.createChannel();
-		channel.queueDeclare(queueName, durable, false, this.autodelete, args);
-		return channel;
-	}
-
-	private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
-		throws Exception {
-		if (channels.containsKey(queueName)) {
-			return channels.get(queueName);
-		}
-
-		if (this.connection == null) {
-			this.connection = createConnection();
-		}
-		channels.put(queueName, createChannel(this.connection, queueName, durable, autodelete));
-		return channels.get(queueName);
-	}
-
-	public void close() throws IOException {
-		channels
-			.values()
-			.forEach(
-				ch -> {
-					try {
-						ch.close();
-					} catch (Exception e) {
-						// TODO LOG
-					}
-				});
-
-		this.connection.close();
-	}
-
-	public boolean sendMessage(final Message message, String queueName) throws Exception {
-		try {
-			Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
-			channel.basicPublish("", queueName, null, message.toString().getBytes());
-			return true;
-		} catch (Throwable e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public boolean sendMessage(
-		final Message message, String queueName, boolean durable_var, boolean autodelete_var)
-		throws Exception {
-		try {
-			Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
-			channel.basicPublish("", queueName, null, message.toString().getBytes());
-			return true;
-		} catch (Throwable e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public void startConsumingMessage(
-		final String queueName, final boolean durable, final boolean autodelete) throws Exception {
-
-		Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
-		channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
@ -1,6 +0,0 @@
-
-package eu.dnetlib.message;
-
-public enum MessageType {
-	ONGOING, REPORT
-}
--- a/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
@ -1,16 +0,0 @@
-
-package eu.dnetlib.dhp.model.mdstore;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import org.junit.jupiter.api.Test;
-
-public class MetadataRecordTest {
-
-	@Test
-	public void getTimestamp() {
-
-		MetadataRecord r = new MetadataRecord();
-		assertTrue(r.getDateOfCollection() > 0);
-	}
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -0,0 +1,69 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class OafMapperUtilsTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+	@Test
+	public void testMergePubs() throws IOException {
+		Publication p1 = read("publication_1.json", Publication.class);
+		Publication p2 = read("publication_2.json", Publication.class);
+		Dataset d1 = read("dataset_1.json", Dataset.class);
+		Dataset d2 = read("dataset_2.json", Dataset.class);
+
+		assertEquals(p1.getCollectedfrom().size(), 1);
+		assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID);
+		assertEquals(d2.getCollectedfrom().size(), 1);
+		assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		assertTrue(
+			OafMapperUtils
+				.mergeResults(p1, d2)
+				.getResulttype()
+				.getClassid()
+				.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
+
+		assertEquals(p2.getCollectedfrom().size(), 1);
+		assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+		assertEquals(d1.getCollectedfrom().size(), 1);
+		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		assertTrue(
+			OafMapperUtils
+				.mergeResults(p2, d1)
+				.getResulttype()
+				.getClassid()
+				.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
+	}
+
+	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
+		return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new));
+	}
+
+	protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
+		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
+		return OBJECT_MAPPER.readValue(json, clazz);
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
@ -1,51 +0,0 @@
-
-package eu.dnetlib.message;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.junit.jupiter.api.Test;
-
-public class MessageTest {
-
-	@Test
-	public void fromJsonTest() throws IOException {
-		Message m = new Message();
-		m.setWorkflowId("wId");
-		m.setType(MessageType.ONGOING);
-		m.setJobName("Collection");
-		Map<String, String> body = new HashMap<>();
-		body.put("parsedItem", "300");
-		body.put("ExecutionTime", "30s");
-
-		m.setBody(body);
-		System.out.println("m = " + m);
-		Message m1 = Message.fromJson(m.toString());
-		assertEquals(m1.getWorkflowId(), m.getWorkflowId());
-		assertEquals(m1.getType(), m.getType());
-		assertEquals(m1.getJobName(), m.getJobName());
-
-		assertNotNull(m1.getBody());
-		m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
-		assertEquals(m1.getJobName(), m.getJobName());
-	}
-
-	@Test
-	public void toStringTest() {
-		final String expectedJson = "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
-		Message m = new Message();
-		m.setWorkflowId("wId");
-		m.setType(MessageType.ONGOING);
-		m.setJobName("Collection");
-		Map<String, String> body = new HashMap<>();
-		body.put("parsedItem", "300");
-		body.put("ExecutionTime", "30s");
-
-		m.setBody(body);
-
-		assertEquals(expectedJson, m.toString());
-	}
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@ -51,16 +51,6 @@
            <artifactId>hadoop-distcp</artifactId>
        </dependency>

-        <dependency>
-            <groupId>eu.dnetlib</groupId>
-            <artifactId>dnet-openaire-data-protos</artifactId>
-        </dependency>
-
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-schemas</artifactId>
-        </dependency>
-
        <dependency>
            <groupId>eu.dnetlib</groupId>
            <artifactId>dnet-actionmanager-api</artifactId>
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/LicenseComparator.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/LicenseComparator.java
@ -1,69 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.migration;
-
-import java.util.Comparator;
-
-import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
-
-public class LicenseComparator implements Comparator<Qualifier> {
-
-	@Override
-	public int compare(Qualifier left, Qualifier right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		String lClass = left.getClassid();
-		String rClass = right.getClassid();
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (lClass.equals("OPEN SOURCE"))
-			return -1;
-		if (rClass.equals("OPEN SOURCE"))
-			return 1;
-
-		if (lClass.equals("OPEN"))
-			return -1;
-		if (rClass.equals("OPEN"))
-			return 1;
-
-		if (lClass.equals("6MONTHS"))
-			return -1;
-		if (rClass.equals("6MONTHS"))
-			return 1;
-
-		if (lClass.equals("12MONTHS"))
-			return -1;
-		if (rClass.equals("12MONTHS"))
-			return 1;
-
-		if (lClass.equals("EMBARGO"))
-			return -1;
-		if (rClass.equals("EMBARGO"))
-			return 1;
-
-		if (lClass.equals("RESTRICTED"))
-			return -1;
-		if (rClass.equals("RESTRICTED"))
-			return 1;
-
-		if (lClass.equals("CLOSED"))
-			return -1;
-		if (rClass.equals("CLOSED"))
-			return 1;
-
-		if (lClass.equals("UNKNOWN"))
-			return -1;
-		if (rClass.equals("UNKNOWN"))
-			return 1;
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return lClass.compareTo(rClass);
-	}
-}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
@ -1,196 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.migration;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Properties;
-import java.util.stream.Collectors;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.tools.DistCp;
-import org.apache.hadoop.tools.DistCpOptions;
-import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.utils.ISLookupClientFactory;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-
-public class MigrateActionSet {
-
-	private static final Logger log = LoggerFactory.getLogger(MigrateActionSet.class);
-
-	private static final String SEPARATOR = "/";
-	private static final String TARGET_PATHS = "target_paths";
-	private static final String RAWSET_PREFIX = "rawset_";
-
-	public static void main(String[] args) throws Exception {
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					MigrateActionSet.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
-		parser.parseArgument(args);
-
-		new MigrateActionSet().run(parser);
-	}
-
-	private void run(ArgumentApplicationParser parser) throws Exception {
-
-		final String isLookupUrl = parser.get("isLookupUrl");
-		final String sourceNN = parser.get("sourceNameNode");
-		final String targetNN = parser.get("targetNameNode");
-		final String workDir = parser.get("workingDirectory");
-		final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
-
-		final String distcp_memory_mb = parser.get("distcp_memory_mb");
-		final String distcp_task_timeout = parser.get("distcp_task_timeout");
-
-		final String transform_only_s = parser.get("transform_only");
-
-		log.info("transform only param: {}", transform_only_s);
-
-		final Boolean transformOnly = Boolean.valueOf(parser.get("transform_only"));
-
-		log.info("transform only: {}", transformOnly);
-
-		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
-
-		Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
-		FileSystem targetFS = FileSystem.get(conf);
-
-		Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
-		sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
-		FileSystem sourceFS = FileSystem.get(sourceConf);
-
-		Properties props = new Properties();
-
-		List<Path> targetPaths = new ArrayList<>();
-
-		final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
-		log
-			.info(
-				"paths to process:\n{}", sourcePaths
-					.stream()
-					.map(p -> p.toString())
-					.collect(Collectors.joining("\n")));
-
-		for (Path source : sourcePaths) {
-
-			if (!sourceFS.exists(source)) {
-				log.warn("skipping unexisting path: {}", source);
-			} else {
-
-				LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
-
-				final String rawSet = pathQ.pollLast();
-				log.info("got RAWSET: {}", rawSet);
-
-				if (StringUtils.isNotBlank(rawSet) && rawSet.startsWith(RAWSET_PREFIX)) {
-
-					final String actionSetDirectory = pathQ.pollLast();
-
-					final Path targetPath = new Path(
-						targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
-
-					log.info("using TARGET PATH: {}", targetPath);
-
-					if (!transformOnly) {
-						if (targetFS.exists(targetPath)) {
-							targetFS.delete(targetPath, true);
-						}
-						runDistcp(
-							distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
-					}
-
-					targetPaths.add(targetPath);
-				}
-			}
-		}
-
-		final String targetPathsCsv = targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(","));
-		props.setProperty(TARGET_PATHS, targetPathsCsv);
-		File file = new File(System.getProperty("oozie.action.output.properties"));
-
-		try (OutputStream os = new FileOutputStream(file)) {
-			props.store(os, "");
-		}
-		System.out.println(file.getAbsolutePath());
-	}
-
-	private void runDistcp(
-		Integer distcp_num_maps,
-		String distcp_memory_mb,
-		String distcp_task_timeout,
-		Configuration conf,
-		Path source,
-		Path targetPath)
-		throws Exception {
-
-		final DistCpOptions op = new DistCpOptions(source, targetPath);
-		op.setMaxMaps(distcp_num_maps);
-		op.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
-		op.preserve(DistCpOptions.FileAttribute.REPLICATION);
-		op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
-
-		int res = ToolRunner
-			.run(
-				new DistCp(conf, op),
-				new String[] {
-					"-Dmapred.task.timeout=" + distcp_task_timeout,
-					"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
-					"-pb",
-					"-m " + distcp_num_maps,
-					source.toString(),
-					targetPath.toString()
-				});
-
-		if (res != 0) {
-			throw new RuntimeException(String.format("distcp exited with code %s", res));
-		}
-	}
-
-	private Configuration getConfiguration(
-		String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
-		final Configuration conf = new Configuration();
-		conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
-		conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
-		conf.set("dfs.http.client.retry.policy.enabled", "true");
-		conf.set("mapred.task.timeout", distcp_task_timeout);
-		conf.set("mapreduce.map.memory.mb", distcp_memory_mb);
-		conf.set("mapred.map.tasks", String.valueOf(distcp_num_maps));
-		return conf;
-	}
-
-	private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
-		throws ISLookUpException {
-		String XQUERY = "distinct-values(\n"
-			+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
-			+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
-			+ "let $setDir := $x//SET/@directory/string()\n"
-			+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
-			+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
-
-		log.info(String.format("running xquery:\n%s", XQUERY));
-		return isLookUp
-			.quickSearchProfile(XQUERY)
-			.stream()
-			.map(p -> sourceNN + p)
-			.map(Path::new)
-			.collect(Collectors.toList());
-	}
-}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
@ -1,710 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.migration;
-
-import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang3.StringUtils;
-
-import com.google.common.collect.Lists;
-import com.googlecode.protobuf.format.JsonFormat;
-
-import eu.dnetlib.data.proto.*;
-import eu.dnetlib.dhp.schema.oaf.*;
-
-public class ProtoConverter implements Serializable {
-
-	public static Oaf convert(OafProtos.Oaf oaf) {
-		try {
-			switch (oaf.getKind()) {
-				case entity:
-					return convertEntity(oaf);
-				case relation:
-					return convertRelation(oaf);
-				default:
-					throw new IllegalArgumentException("invalid kind " + oaf.getKind());
-			}
-		} catch (Throwable e) {
-			throw new RuntimeException("error on getting " + JsonFormat.printToString(oaf), e);
-		}
-	}
-
-	private static Relation convertRelation(OafProtos.Oaf oaf) {
-		final OafProtos.OafRel r = oaf.getRel();
-		final Relation rel = new Relation();
-		rel.setDataInfo(mapDataInfo(oaf.getDataInfo()));
-		rel.setLastupdatetimestamp(oaf.getLastupdatetimestamp());
-		rel.setSource(r.getSource());
-		rel.setTarget(r.getTarget());
-		rel.setRelType(r.getRelType().toString());
-		rel.setSubRelType(r.getSubRelType().toString());
-		rel.setRelClass(r.getRelClass());
-		rel
-			.setCollectedfrom(
-				r.getCollectedfromCount() > 0
-					? r.getCollectedfromList().stream().map(kv -> mapKV(kv)).collect(Collectors.toList())
-					: null);
-		return rel;
-	}
-
-	private static OafEntity convertEntity(OafProtos.Oaf oaf) {
-
-		switch (oaf.getEntity().getType()) {
-			case result:
-				final Result r = convertResult(oaf);
-				r.setInstance(convertInstances(oaf));
-				r.setExternalReference(convertExternalRefs(oaf));
-				return r;
-			case project:
-				return convertProject(oaf);
-			case datasource:
-				return convertDataSource(oaf);
-			case organization:
-				return convertOrganization(oaf);
-			default:
-				throw new RuntimeException("received unknown type");
-		}
-	}
-
-	private static List<Instance> convertInstances(OafProtos.Oaf oaf) {
-
-		final ResultProtos.Result r = oaf.getEntity().getResult();
-		if (r.getInstanceCount() > 0) {
-			return r.getInstanceList().stream().map(i -> convertInstance(i)).collect(Collectors.toList());
-		}
-		return Lists.newArrayList();
-	}
-
-	private static Instance convertInstance(ResultProtos.Result.Instance ri) {
-		final Instance i = new Instance();
-		i.setAccessright(mapQualifier(ri.getAccessright()));
-		i.setCollectedfrom(mapKV(ri.getCollectedfrom()));
-		i.setDateofacceptance(mapStringField(ri.getDateofacceptance()));
-		i.setDistributionlocation(ri.getDistributionlocation());
-		i.setHostedby(mapKV(ri.getHostedby()));
-		i.setInstancetype(mapQualifier(ri.getInstancetype()));
-		i.setLicense(mapStringField(ri.getLicense()));
-		i
-			.setUrl(
-				ri.getUrlList() != null ? ri
-					.getUrlList()
-					.stream()
-					.distinct()
-					.collect(Collectors.toCollection(ArrayList::new)) : null);
-		i.setRefereed(mapRefereed(ri.getRefereed()));
-		i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
-		i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
-		return i;
-	}
-
-	private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) {
-		Qualifier q = new Qualifier();
-		q.setClassid(refereed.getValue());
-		q.setSchemename(refereed.getValue());
-		q.setSchemeid("dnet:review_levels");
-		q.setSchemename("dnet:review_levels");
-		return q;
-	}
-
-	private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
-		ResultProtos.Result r = oaf.getEntity().getResult();
-		if (r.getExternalReferenceCount() > 0) {
-			return r
-				.getExternalReferenceList()
-				.stream()
-				.map(e -> convertExtRef(e))
-				.collect(Collectors.toList());
-		}
-		return Lists.newArrayList();
-	}
-
-	private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
-		ExternalReference ex = new ExternalReference();
-		ex.setUrl(e.getUrl());
-		ex.setSitename(e.getSitename());
-		ex.setRefidentifier(e.getRefidentifier());
-		ex.setQuery(e.getQuery());
-		ex.setQualifier(mapQualifier(e.getQualifier()));
-		ex.setLabel(e.getLabel());
-		ex.setDescription(e.getDescription());
-		ex.setDataInfo(ex.getDataInfo());
-		return ex;
-	}
-
-	private static Organization convertOrganization(OafProtos.Oaf oaf) {
-		final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
-		final Organization org = setOaf(new Organization(), oaf);
-		setEntity(org, oaf);
-		org.setLegalshortname(mapStringField(m.getLegalshortname()));
-		org.setLegalname(mapStringField(m.getLegalname()));
-		org
-			.setAlternativeNames(
-				m
-					.getAlternativeNamesList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
-		org.setLogourl(mapStringField(m.getLogourl()));
-		org.setEclegalbody(mapStringField(m.getEclegalbody()));
-		org.setEclegalperson(mapStringField(m.getEclegalperson()));
-		org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
-		org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
-		org.setEchighereducation(mapStringField(m.getEchighereducation()));
-		org
-			.setEcinternationalorganizationeurinterests(
-				mapStringField(m.getEcinternationalorganizationeurinterests()));
-		org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
-		org.setEcenterprise(mapStringField(m.getEcenterprise()));
-		org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
-		org.setEcnutscode(mapStringField(m.getEcnutscode()));
-		org.setCountry(mapQualifier(m.getCountry()));
-
-		return org;
-	}
-
-	private static Datasource convertDataSource(OafProtos.Oaf oaf) {
-		final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
-		final Datasource datasource = setOaf(new Datasource(), oaf);
-		setEntity(datasource, oaf);
-		datasource
-			.setAccessinfopackage(
-				m
-					.getAccessinfopackageList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		datasource.setCertificates(mapStringField(m.getCertificates()));
-		datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
-		datasource.setContactemail(mapStringField(m.getContactemail()));
-		datasource.setDatabaseaccessrestriction(mapStringField(m.getDatabaseaccessrestriction()));
-		datasource.setDatabaseaccesstype(mapStringField(m.getDatabaseaccesstype()));
-		datasource.setDataprovider(mapBoolField(m.getDataprovider()));
-		datasource.setDatasourcetype(mapQualifier(m.getDatasourcetype()));
-		datasource.setDatauploadrestriction(mapStringField(m.getDatauploadrestriction()));
-		datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
-		datasource.setDatauploadtype(mapStringField(m.getDatauploadtype()));
-		datasource.setDateofvalidation(mapStringField(m.getDateofvalidation()));
-		datasource.setDescription(mapStringField(m.getDescription()));
-		datasource.setEnglishname(mapStringField(m.getEnglishname()));
-		datasource.setLatitude(mapStringField(m.getLatitude()));
-		datasource.setLongitude(mapStringField(m.getLongitude()));
-		datasource.setLogourl(mapStringField(m.getLogourl()));
-		datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
-		datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
-		datasource
-			.setOdcontenttypes(
-				m
-					.getOdcontenttypesList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		datasource
-			.setOdlanguages(
-				m
-					.getOdlanguagesList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
-		datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
-		datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
-		datasource.setOfficialname(mapStringField(m.getOfficialname()));
-		datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
-		datasource.setPidsystems(mapStringField(m.getPidsystems()));
-		datasource
-			.setPolicies(
-				m.getPoliciesList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
-		datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
-		datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
-		datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
-		datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
-		datasource
-			.setSubjects(
-				m
-					.getSubjectsList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		datasource.setVersioning(mapBoolField(m.getVersioning()));
-		datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
-		datasource.setJournal(mapJournal(m.getJournal()));
-
-		return datasource;
-	}
-
-	private static Project convertProject(OafProtos.Oaf oaf) {
-		final ProjectProtos.Project.Metadata m = oaf.getEntity().getProject().getMetadata();
-		final Project project = setOaf(new Project(), oaf);
-		setEntity(project, oaf);
-		project.setAcronym(mapStringField(m.getAcronym()));
-		project.setCallidentifier(mapStringField(m.getCallidentifier()));
-		project.setCode(mapStringField(m.getCode()));
-		project.setContactemail(mapStringField(m.getContactemail()));
-		project.setContactfax(mapStringField(m.getContactfax()));
-		project.setContactfullname(mapStringField(m.getContactfullname()));
-		project.setContactphone(mapStringField(m.getContactphone()));
-		project.setContracttype(mapQualifier(m.getContracttype()));
-		project.setCurrency(mapStringField(m.getCurrency()));
-		project.setDuration(mapStringField(m.getDuration()));
-		project.setEcarticle29_3(mapStringField(m.getEcarticle293()));
-		project.setEcsc39(mapStringField(m.getEcsc39()));
-		project.setOamandatepublications(mapStringField(m.getOamandatepublications()));
-		project.setStartdate(mapStringField(m.getStartdate()));
-		project.setEnddate(mapStringField(m.getEnddate()));
-		project.setFundedamount(m.getFundedamount());
-		project.setTotalcost(m.getTotalcost());
-		project.setKeywords(mapStringField(m.getKeywords()));
-		project
-			.setSubjects(
-				m
-					.getSubjectsList()
-					.stream()
-					.map(sp -> mapStructuredProperty(sp))
-					.collect(Collectors.toList()));
-		project.setTitle(mapStringField(m.getTitle()));
-		project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
-		project
-			.setFundingtree(
-				m.getFundingtreeList().stream().map(f -> mapStringField(f)).collect(Collectors.toList()));
-		project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
-		project.setSummary(mapStringField(m.getSummary()));
-		project.setOptional1(mapStringField(m.getOptional1()));
-		project.setOptional2(mapStringField(m.getOptional2()));
-		return project;
-	}
-
-	private static Result convertResult(OafProtos.Oaf oaf) {
-		switch (oaf.getEntity().getResult().getMetadata().getResulttype().getClassid()) {
-			case "dataset":
-				return createDataset(oaf);
-			case "publication":
-				return createPublication(oaf);
-			case "software":
-				return createSoftware(oaf);
-			case "other":
-				return createORP(oaf);
-			default:
-				Result result = setOaf(new Result(), oaf);
-				setEntity(result, oaf);
-				return setResult(result, oaf);
-		}
-	}
-
-	private static Software createSoftware(OafProtos.Oaf oaf) {
-		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
-		Software software = setOaf(new Software(), oaf);
-		setEntity(software, oaf);
-		setResult(software, oaf);
-
-		software
-			.setDocumentationUrl(
-				m
-					.getDocumentationUrlList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		software
-			.setLicense(
-				m
-					.getLicenseList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
-		software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
-		return software;
-	}
-
-	private static OtherResearchProduct createORP(OafProtos.Oaf oaf) {
-		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
-		OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
-		setEntity(otherResearchProducts, oaf);
-		setResult(otherResearchProducts, oaf);
-		otherResearchProducts
-			.setContactperson(
-				m
-					.getContactpersonList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		otherResearchProducts
-			.setContactgroup(
-				m
-					.getContactgroupList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		otherResearchProducts
-			.setTool(
-				m.getToolList().stream().map(ProtoConverter::mapStringField).collect(Collectors.toList()));
-
-		return otherResearchProducts;
-	}
-
-	private static Publication createPublication(OafProtos.Oaf oaf) {
-
-		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
-		Publication publication = setOaf(new Publication(), oaf);
-		setEntity(publication, oaf);
-		setResult(publication, oaf);
-		publication.setJournal(mapJournal(m.getJournal()));
-		return publication;
-	}
-
-	private static Dataset createDataset(OafProtos.Oaf oaf) {
-
-		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
-		Dataset dataset = setOaf(new Dataset(), oaf);
-		setEntity(dataset, oaf);
-		setResult(dataset, oaf);
-		dataset.setStoragedate(mapStringField(m.getStoragedate()));
-		dataset.setDevice(mapStringField(m.getDevice()));
-		dataset.setSize(mapStringField(m.getSize()));
-		dataset.setVersion(mapStringField(m.getVersion()));
-		dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
-		dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
-		dataset
-			.setGeolocation(
-				m
-					.getGeolocationList()
-					.stream()
-					.map(ProtoConverter::mapGeolocation)
-					.collect(Collectors.toList()));
-		return dataset;
-	}
-
-	public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
-		oaf.setDataInfo(mapDataInfo(o.getDataInfo()));
-		oaf.setLastupdatetimestamp(o.getLastupdatetimestamp());
-		return oaf;
-	}
-
-	public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
-		// setting Entity fields
-		final OafProtos.OafEntity e = oaf.getEntity();
-		entity.setId(e.getId());
-		entity.setOriginalId(e.getOriginalIdList());
-		entity
-			.setCollectedfrom(
-				e.getCollectedfromList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
-		entity
-			.setPid(
-				e
-					.getPidList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		entity.setDateofcollection(e.getDateofcollection());
-		entity.setDateoftransformation(e.getDateoftransformation());
-		entity
-			.setExtraInfo(
-				e
-					.getExtraInfoList()
-					.stream()
-					.map(ProtoConverter::mapExtraInfo)
-					.collect(Collectors.toList()));
-		return entity;
-	}
-
-	public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
-		// setting Entity fields
-		final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
-		entity
-			.setAuthor(
-				m.getAuthorList().stream().map(ProtoConverter::mapAuthor).collect(Collectors.toList()));
-		entity.setResulttype(mapQualifier(m.getResulttype()));
-		entity.setLanguage(mapQualifier(m.getLanguage()));
-		entity
-			.setCountry(
-				m
-					.getCountryList()
-					.stream()
-					.map(ProtoConverter::mapQualifierAsCountry)
-					.collect(Collectors.toList()));
-		entity
-			.setSubject(
-				m
-					.getSubjectList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		entity
-			.setTitle(
-				m
-					.getTitleList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		entity
-			.setRelevantdate(
-				m
-					.getRelevantdateList()
-					.stream()
-					.map(ProtoConverter::mapStructuredProperty)
-					.collect(Collectors.toList()));
-		entity
-			.setDescription(
-				m
-					.getDescriptionList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
-		entity.setPublisher(mapStringField(m.getPublisher()));
-		entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
-		entity
-			.setSource(
-				m
-					.getSourceList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity
-			.setFulltext(
-				m
-					.getFulltextList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity
-			.setFormat(
-				m
-					.getFormatList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity
-			.setContributor(
-				m
-					.getContributorList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity.setResourcetype(mapQualifier(m.getResourcetype()));
-		entity
-			.setCoverage(
-				m
-					.getCoverageList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		entity
-			.setContext(
-				m.getContextList().stream().map(ProtoConverter::mapContext).collect(Collectors.toList()));
-
-		entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
-
-		return entity;
-	}
-
-	private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
-		if (instanceList != null) {
-			final Optional<FieldTypeProtos.Qualifier> min = instanceList
-				.stream()
-				.map(i -> i.getAccessright())
-				.min(new LicenseComparator());
-
-			final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
-
-			if (StringUtils.isBlank(rights.getClassid())) {
-				rights.setClassid(UNKNOWN);
-			}
-			if (StringUtils.isBlank(rights.getClassname())
-				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
-				rights.setClassname(NOT_AVAILABLE);
-			}
-			if (StringUtils.isBlank(rights.getSchemeid())) {
-				rights.setSchemeid(DNET_ACCESS_MODES);
-			}
-			if (StringUtils.isBlank(rights.getSchemename())) {
-				rights.setSchemename(DNET_ACCESS_MODES);
-			}
-
-			return rights;
-		}
-		return null;
-	}
-
-	private static Context mapContext(ResultProtos.Result.Context context) {
-		if (context == null || StringUtils.isBlank(context.getId())) {
-			return null;
-		}
-		final Context entity = new Context();
-		entity.setId(context.getId());
-		entity
-			.setDataInfo(
-				context
-					.getDataInfoList()
-					.stream()
-					.map(ProtoConverter::mapDataInfo)
-					.collect(Collectors.toList()));
-		return entity;
-	}
-
-	public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
-		if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
-			return null;
-		}
-
-		final KeyValue keyValue = new KeyValue();
-		keyValue.setKey(kv.getKey());
-		keyValue.setValue(kv.getValue());
-		keyValue.setDataInfo(mapDataInfo(kv.getDataInfo()));
-		return keyValue;
-	}
-
-	public static DataInfo mapDataInfo(FieldTypeProtos.DataInfo d) {
-		final DataInfo dataInfo = new DataInfo();
-		dataInfo.setDeletedbyinference(d.getDeletedbyinference());
-		dataInfo.setInferenceprovenance(d.getInferenceprovenance());
-		dataInfo.setInferred(d.getInferred());
-		dataInfo.setInvisible(d.getInvisible());
-		dataInfo.setProvenanceaction(mapQualifier(d.getProvenanceaction()));
-		dataInfo.setTrust(d.getTrust());
-		return dataInfo;
-	}
-
-	public static Qualifier mapQualifier(FieldTypeProtos.Qualifier q) {
-		final Qualifier qualifier = new Qualifier();
-		qualifier.setClassid(q.getClassid());
-		qualifier.setClassname(q.getClassname());
-		qualifier.setSchemeid(q.getSchemeid());
-		qualifier.setSchemename(q.getSchemename());
-		return qualifier;
-	}
-
-	public static Country mapQualifierAsCountry(FieldTypeProtos.Qualifier q) {
-		final Country c = new Country();
-		c.setClassid(q.getClassid());
-		c.setClassname(q.getClassname());
-		c.setSchemeid(q.getSchemeid());
-		c.setSchemename(q.getSchemename());
-		c.setDataInfo(mapDataInfo(q.getDataInfo()));
-		return c;
-	}
-
-	public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
-		if (sp == null | StringUtils.isBlank(sp.getValue())) {
-			return null;
-		}
-
-		final StructuredProperty structuredProperty = new StructuredProperty();
-		structuredProperty.setValue(sp.getValue());
-		structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
-		structuredProperty.setDataInfo(mapDataInfo(sp.getDataInfo()));
-		return structuredProperty;
-	}
-
-	public static ExtraInfo mapExtraInfo(FieldTypeProtos.ExtraInfo extraInfo) {
-		final ExtraInfo entity = new ExtraInfo();
-		entity.setName(extraInfo.getName());
-		entity.setTypology(extraInfo.getTypology());
-		entity.setProvenance(extraInfo.getProvenance());
-		entity.setTrust(extraInfo.getTrust());
-		entity.setValue(extraInfo.getValue());
-		return entity;
-	}
-
-	public static OAIProvenance mapOAIProvenance(FieldTypeProtos.OAIProvenance oaiProvenance) {
-		final OAIProvenance entity = new OAIProvenance();
-		entity.setOriginDescription(mapOriginalDescription(oaiProvenance.getOriginDescription()));
-		return entity;
-	}
-
-	public static OriginDescription mapOriginalDescription(
-		FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
-		final OriginDescription originDescriptionResult = new OriginDescription();
-		originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
-		originDescriptionResult.setAltered(originDescription.getAltered());
-		originDescriptionResult.setBaseURL(originDescription.getBaseURL());
-		originDescriptionResult.setIdentifier(originDescription.getIdentifier());
-		originDescriptionResult.setDatestamp(originDescription.getDatestamp());
-		originDescriptionResult.setMetadataNamespace(originDescription.getMetadataNamespace());
-		return originDescriptionResult;
-	}
-
-	public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
-		if (s == null || StringUtils.isBlank(s.getValue())) {
-			return null;
-		}
-
-		final Field<String> stringField = new Field<>();
-		stringField.setValue(s.getValue());
-		stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
-		return stringField;
-	}
-
-	public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
-		if (b == null) {
-			return null;
-		}
-
-		final Field<Boolean> booleanField = new Field<>();
-		booleanField.setValue(b.getValue());
-		booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
-		return booleanField;
-	}
-
-	public static Journal mapJournal(FieldTypeProtos.Journal j) {
-		final Journal journal = new Journal();
-		journal.setConferencedate(j.getConferencedate());
-		journal.setConferenceplace(j.getConferenceplace());
-		journal.setEdition(j.getEdition());
-		journal.setEp(j.getEp());
-		journal.setIss(j.getIss());
-		journal.setIssnLinking(j.getIssnLinking());
-		journal.setIssnOnline(j.getIssnOnline());
-		journal.setIssnPrinted(j.getIssnPrinted());
-		journal.setName(j.getName());
-		journal.setSp(j.getSp());
-		journal.setVol(j.getVol());
-		journal.setDataInfo(mapDataInfo(j.getDataInfo()));
-		return journal;
-	}
-
-	public static Author mapAuthor(FieldTypeProtos.Author author) {
-		final Author entity = new Author();
-		entity.setFullname(author.getFullname());
-		entity.setName(author.getName());
-		entity.setSurname(author.getSurname());
-		entity.setRank(author.getRank());
-		entity
-			.setPid(
-				author
-					.getPidList()
-					.stream()
-					.map(
-						kv -> {
-							final StructuredProperty sp = new StructuredProperty();
-							sp.setValue(kv.getValue());
-							final Qualifier q = new Qualifier();
-							q.setClassid(kv.getKey());
-							q.setClassname(kv.getKey());
-							sp.setQualifier(q);
-							return sp;
-						})
-					.collect(Collectors.toList()));
-		entity
-			.setAffiliation(
-				author
-					.getAffiliationList()
-					.stream()
-					.map(ProtoConverter::mapStringField)
-					.collect(Collectors.toList()));
-		return entity;
-	}
-
-	public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
-		final GeoLocation entity = new GeoLocation();
-		entity.setPoint(geoLocation.getPoint());
-		entity.setBox(geoLocation.getBox());
-		entity.setPlace(geoLocation.getPlace());
-		return entity;
-	}
-}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/TransformActions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/TransformActions.java
@ -1,172 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.migration;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.LinkedList;
-import java.util.Objects;
-import java.util.Optional;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-import com.google.protobuf.InvalidProtocolBufferException;
-
-import eu.dnetlib.data.proto.OafProtos;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.schema.action.AtomicAction;
-import eu.dnetlib.dhp.schema.oaf.*;
-import eu.dnetlib.dhp.utils.ISLookupClientFactory;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-import scala.Tuple2;
-
-public class TransformActions implements Serializable {
-
-	private static final Logger log = LoggerFactory.getLogger(TransformActions.class);
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	private static final String SEPARATOR = "/";
-
-	public static void main(String[] args) throws Exception {
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					MigrateActionSet.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json")));
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String isLookupUrl = parser.get("isLookupUrl");
-		log.info("isLookupUrl: {}", isLookupUrl);
-
-		final String inputPaths = parser.get("inputPaths");
-
-		if (StringUtils.isBlank(inputPaths)) {
-			throw new RuntimeException("empty inputPaths");
-		}
-		log.info("inputPaths: {}", inputPaths);
-
-		final String targetBaseDir = getTargetBaseDir(isLookupUrl);
-
-		SparkConf conf = new SparkConf();
-
-		runWithSparkSession(
-			conf, isSparkSessionManaged, spark -> transformActions(inputPaths, targetBaseDir, spark));
-	}
-
-	private static void transformActions(String inputPaths, String targetBaseDir, SparkSession spark)
-		throws IOException {
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-		final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
-
-		for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
-
-			LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
-
-			final String rawset = pathQ.pollLast();
-			final String actionSetDirectory = pathQ.pollLast();
-
-			final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
-
-			if (fs.exists(targetDirectory)) {
-				log.info("found target directory '{}", targetDirectory);
-				fs.delete(targetDirectory, true);
-				log.info("deleted target directory '{}", targetDirectory);
-			}
-
-			log.info("transforming actions from '{}' to '{}'", sourcePath, targetDirectory);
-
-			sc
-				.sequenceFile(sourcePath, Text.class, Text.class)
-				.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
-				.map(TransformActions::doTransform)
-				.filter(Objects::nonNull)
-				.mapToPair(
-					a -> new Tuple2<>(a.getClazz().toString(), OBJECT_MAPPER.writeValueAsString(a)))
-				.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
-				.saveAsNewAPIHadoopFile(
-					targetDirectory.toString(),
-					Text.class,
-					Text.class,
-					SequenceFileOutputFormat.class,
-					sc.hadoopConfiguration());
-		}
-	}
-
-	private static AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
-		throws InvalidProtocolBufferException {
-
-		// dedup similarity relations had empty target value, don't migrate them
-		if (aa.getTargetValue().length == 0) {
-			return null;
-		}
-		final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
-		final Oaf oaf = ProtoConverter.convert(proto_oaf);
-		switch (proto_oaf.getKind()) {
-			case entity:
-				switch (proto_oaf.getEntity().getType()) {
-					case datasource:
-						return new AtomicAction<>(Datasource.class, (Datasource) oaf);
-					case organization:
-						return new AtomicAction<>(Organization.class, (Organization) oaf);
-					case project:
-						return new AtomicAction<>(Project.class, (Project) oaf);
-					case result:
-						final String resulttypeid = proto_oaf
-							.getEntity()
-							.getResult()
-							.getMetadata()
-							.getResulttype()
-							.getClassid();
-						switch (resulttypeid) {
-							case "publication":
-								return new AtomicAction<>(Publication.class, (Publication) oaf);
-							case "software":
-								return new AtomicAction<>(Software.class, (Software) oaf);
-							case "other":
-								return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
-							case "dataset":
-								return new AtomicAction<>(Dataset.class, (Dataset) oaf);
-							default:
-								// can be an update, where the resulttype is not specified
-								return new AtomicAction<>(Result.class, (Result) oaf);
-						}
-					default:
-						throw new IllegalArgumentException(
-							"invalid entity type: " + proto_oaf.getEntity().getType());
-				}
-			case relation:
-				return new AtomicAction<>(Relation.class, (Relation) oaf);
-			default:
-				throw new IllegalArgumentException("invalid kind: " + proto_oaf.getKind());
-		}
-	}
-
-	private static String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
-		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
-		String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
-		return isLookUp.getResourceProfileByQuery(XQUERY);
-	}
-}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
@ -5,12 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;

 import java.io.IOException;
-import java.util.Objects;
 import java.util.Optional;
 import java.util.function.BiFunction;
 import java.util.function.Function;

 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
@ -68,6 +68,12 @@ public class PromoteActionPayloadForGraphTableJob {
 		MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
 		logger.info("strategy: {}", strategy);

+		Boolean shouldGroupById = Optional
+			.ofNullable(parser.get("shouldGroupById"))
+			.map(Boolean::valueOf)
+			.orElse(true);
+		logger.info("shouldGroupById: {}", shouldGroupById);
+
 		Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
 		Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);

@ -89,7 +95,8 @@ public class PromoteActionPayloadForGraphTableJob {
 					outputGraphTablePath,
 					strategy,
 					rowClazz,
-					actionPayloadClazz);
+					actionPayloadClazz,
+					shouldGroupById);
 			});
 	}

@ -115,12 +122,12 @@ public class PromoteActionPayloadForGraphTableJob {
 		String outputGraphTablePath,
 		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
-		Class<A> actionPayloadClazz) {
+		Class<A> actionPayloadClazz, Boolean shouldGroupById) {
 		Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
 		Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);

 		Dataset<G> result = promoteActionPayloadForGraphTable(
-			rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
+			rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
 				.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));

 		saveGraphTable(result, outputGraphTablePath);
@ -153,9 +160,9 @@ public class PromoteActionPayloadForGraphTableJob {

 	private static String extractPayload(Row value) {
 		try {
-			return value.<String> getAs("payload");
+			return value.getAs("payload");
 		} catch (IllegalArgumentException | ClassCastException e) {
-			logger.error("cannot extract payload from action: {}", value.toString());
+			logger.error("cannot extract payload from action: {}", value);
 			throw e;
 		}
 	}
@ -174,7 +181,8 @@ public class PromoteActionPayloadForGraphTableJob {
 		Dataset<A> actionPayloadDS,
 		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
-		Class<A> actionPayloadClazz) {
+		Class<A> actionPayloadClazz,
+		Boolean shouldGroupById) {
 		logger
 			.info(
 				"Promoting action payload for graph table: payload={}, table={}",
@ -186,7 +194,7 @@ public class PromoteActionPayloadForGraphTableJob {
 		SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
 		SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
 		SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
-		SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
+		SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;

 		Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
 			.joinGraphTableWithActionPayloadAndMerge(
@ -198,9 +206,13 @@ public class PromoteActionPayloadForGraphTableJob {
 				rowClazz,
 				actionPayloadClazz);

-		return PromoteActionPayloadFunctions
-			.groupGraphTableByIdAndMerge(
-				joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
+		if (shouldGroupById) {
+			return PromoteActionPayloadFunctions
+				.groupGraphTableByIdAndMerge(
+					joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
+		} else {
+			return joinedAndMerged;
+		}
 	}

 	private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
@ -226,12 +238,13 @@ public class PromoteActionPayloadForGraphTableJob {
 		}
 	}

-	private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSource() {
+	private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSourceAndTarget() {
 		return t -> {
 			if (isSubClass(t, Relation.class)) {
-				return Objects.nonNull(((Relation) t).getSource());
+				final Relation rel = (Relation) t;
+				return StringUtils.isNotBlank(rel.getSource()) && StringUtils.isNotBlank(rel.getTarget());
 			}
-			return Objects.nonNull(((OafEntity) t).getId());
+			return StringUtils.isNotBlank(((OafEntity) t).getId());
 		};
 	}

--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
@ -112,6 +112,7 @@ public class PromoteActionPayloadFunctions {
 		Class<G> rowClazz) {
 		TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
 		return rowDS
+			.filter((FilterFunction<G>) o -> isNotZeroFn.get().apply(o))
 			.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
 			.agg(aggregator)
 			.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json
@ -1,56 +0,0 @@
-[
-  {
-    "paramName": "issm",
-    "paramLongName": "isSparkSessionManaged",
-    "paramDescription": "when true will stop SparkSession after job execution",
-    "paramRequired": false
-  },
-  {
-    "paramName": "is",
-    "paramLongName": "isLookupUrl",
-    "paramDescription": "URL of the isLookUp Service",
-    "paramRequired": true
-  },
-  {
-    "paramName": "sn",
-    "paramLongName": "sourceNameNode",
-    "paramDescription": "nameNode of the source cluster",
-    "paramRequired": true
-  },
-  {
-    "paramName": "tn",
-    "paramLongName": "targetNameNode",
-    "paramDescription": "namoNode of the target cluster",
-    "paramRequired": true
-  },
-  {
-    "paramName": "w",
-    "paramLongName": "workingDirectory",
-    "paramDescription": "working directory",
-    "paramRequired": true
-  },
-  {
-    "paramName": "nm",
-    "paramLongName": "distcp_num_maps",
-    "paramDescription": "maximum number of map tasks used in the distcp process",
-    "paramRequired": true
-  },
-  {
-    "paramName": "mm",
-    "paramLongName": "distcp_memory_mb",
-    "paramDescription": "memory for distcp action copying actionsets from remote cluster",
-    "paramRequired": true
-  },
-  {
-    "paramName": "tt",
-    "paramLongName": "distcp_task_timeout",
-    "paramDescription": "timeout for distcp copying actions from remote cluster",
-    "paramRequired": true
-  },
-  {
-    "paramName": "tr",
-    "paramLongName": "transform_only",
-    "paramDescription": "activate tranform-only mode. Only apply transformation step",
-    "paramRequired": true
-  }
-]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json
@ -1,20 +0,0 @@
-[
-  {
-    "paramName": "issm",
-    "paramLongName": "isSparkSessionManaged",
-    "paramDescription": "when true will stop SparkSession after job execution",
-    "paramRequired": false
-  },
-  {
-    "paramName": "is",
-    "paramLongName": "isLookupUrl",
-    "paramDescription": "URL of the isLookUp Service",
-    "paramRequired": true
-  },
-  {
-    "paramName": "i",
-    "paramLongName": "inputPaths",
-    "paramDescription": "URL of the isLookUp Service",
-    "paramRequired": true
-  }
-]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
@ -40,5 +40,11 @@
    "paramLongName": "mergeAndGetStrategy",
    "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
    "paramRequired": true
+  },
+  {
+    "paramName": "sgid",
+    "paramLongName": "shouldGroupById",
+    "paramDescription": "indicates whether the promotion operation should group objects in the graph by id or not",
+    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
+        <property>
+            <name>shouldGroupById</name>
+            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -111,6 +115,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
        <error to="Kill"/>
@ -162,6 +167,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
@ -56,6 +56,11 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
+        <property>
+            <name>shouldGroupById</name>
+            <value>false</value>
+            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/migration/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/migration/oozie_app/workflow.xml
@ -1,138 +0,0 @@
-<workflow-app xmlns='uri:oozie:workflow:0.5' name='migrate_actions'>
-    <parameters>
-        <property>
-            <name>sourceNN</name>
-            <description>the source name node</description>
-        </property>
-        <property>
-            <name>isLookupUrl</name>
-            <description>the isLookup service endpoint</description>
-        </property>
-        <property>
-            <name>workingDirectory</name>
-            <description>working directory</description>
-        </property>
-        <property>
-            <name>distcp_memory_mb</name>
-            <value>6144</value>
-            <description>memory for distcp copying actionsets from remote cluster</description>
-        </property>
-        <property>
-            <name>distcp_task_timeout</name>
-            <value>60000000</value>
-            <description>timeout for distcp copying actions from remote cluster</description>
-        </property>
-        <property>
-            <name>distcp_num_maps</name>
-            <value>1</value>
-            <description>mmaximum number of map tasks used in the distcp process</description>
-        </property>
-        <property>
-            <name>transform_only</name>
-            <description>activate tranform-only mode. Only apply transformation step</description>
-        </property>
-        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
-        </property>
-        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
-        </property>
-        <property>
-            <name>oozieActionShareLibForSpark2</name>
-            <description>oozie action sharelib for spark 2.*</description>
-        </property>
-        <property>
-            <name>spark2ExtraListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
-            <description>spark 2.* extra listeners classname</description>
-        </property>
-        <property>
-            <name>spark2SqlQueryExecutionListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
-            <description>spark 2.* sql query execution listeners classname</description>
-        </property>
-        <property>
-            <name>spark2YarnHistoryServerAddress</name>
-            <description>spark 2.* yarn history server address</description>
-        </property>
-        <property>
-            <name>spark2EventLogDir</name>
-            <description>spark 2.* event log dir location</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>mapreduce.job.queuename</name>
-                <value>${queueName}</value>
-            </property>
-            <property>
-                <name>oozie.launcher.mapred.job.queue.name</name>
-                <value>${oozieLauncherQueueName}</value>
-            </property>
-            <property>
-                <name>oozie.action.sharelib.for.spark</name>
-                <value>${oozieActionShareLibForSpark2}</value>
-            </property>
-        </configuration>
-    </global>
-
-    <start to="migrate_actionsets"/>
-
-    <action name="migrate_actionsets">
-        <java>
-            <main-class>eu.dnetlib.dhp.actionmanager.migration.MigrateActionSet</main-class>
-            <java-opt>-Dmapred.task.timeout=${distcp_task_timeout}</java-opt>
-            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--sourceNameNode</arg><arg>${sourceNN}</arg>
-            <arg>--targetNameNode</arg><arg>${nameNode}</arg>
-            <arg>--workingDirectory</arg><arg>${workingDirectory}</arg>
-            <arg>--distcp_num_maps</arg><arg>${distcp_num_maps}</arg>
-            <arg>--distcp_memory_mb</arg><arg>${distcp_memory_mb}</arg>
-            <arg>--distcp_task_timeout</arg><arg>${distcp_task_timeout}</arg>
-            <arg>--transform_only</arg><arg>${transform_only}</arg>
-            <capture-output/>
-        </java>
-        <ok to="transform_actions" />
-        <error to="fail" />
-    </action>
-
-    <action name="transform_actions">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>transform_actions</name>
-            <class>eu.dnetlib.dhp.actionmanager.migration.TransformActions</class>
-            <jar>dhp-actionmanager-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
-                --executor-memory=${sparkExecutorMemory}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-            </spark-opts>
-            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--inputPaths</arg><arg>${wf:actionData('migrate_actionsets')['target_paths']}</arg>
-        </spark>
-        <ok to="end"/>
-        <error to="fail"/>
-    </action>
-
-    <kill name="fail">
-        <message>migrate_actions failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-    <end name="end" />
-
-</workflow-app>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
+        <property>
+            <name>shouldGroupById</name>
+            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -110,6 +114,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
        <error to="Kill"/>
@ -161,6 +166,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
+        <property>
+            <name>shouldGroupById</name>
+            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -111,6 +115,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
        <error to="Kill"/>
@ -162,6 +167,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
+        <property>
+            <name>shouldGroupById</name>
+            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -110,6 +114,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
        <error to="Kill"/>
@ -161,6 +166,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
+            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java
+++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java
@ -101,7 +101,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
 							"-outputGraphTablePath",
 							"",
 							"-mergeAndGetStrategy",
-							MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
+							MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name(),
+							"--shouldGroupById",
+							"true"
 						}));

 			// then
@ -141,7 +143,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
 						"-outputGraphTablePath",
 						outputGraphTableDir.toString(),
 						"-mergeAndGetStrategy",
-						strategy.name()
+						strategy.name(),
+						"--shouldGroupById",
+						"true"
 					});

 			// then
--- a/dhp-workflows/dhp-aggregation/README.md
+++ b/dhp-workflows/dhp-aggregation/README.md
@ -1,29 +1,27 @@
 Description of the Module
 --------------------------
-This module defines a **collector worker application** that runs on Hadoop.
+This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
+Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
+the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping 
+of each MDStore.

-It is responsible for harvesting metadata using different plugins.
+## Metadata collection

-The collector worker uses a message queue to inform the progress 
-of the harvesting action (using a message queue for sending **ONGOING** messages) furthermore, 
-It gives, at the end of the job, some information about the status 
-of the collection i.e Number of records collected(using a message queue for sending **REPORT** messages).
+The **metadata collection workflow** is responsible for harvesting metadata records from different protocols and responding to 
+different formats and to store them as on HDFS so that they can be further processed. 

-To work the collection worker need some parameter like:
+### Collector Plugins

-* **hdfsPath**: the path where storing the sequential file
-* **apidescriptor**: the JSON encoding of the API Descriptor
-* **namenode**: the Name Node URI
-* **userHDFS**: the user wich create the hdfs seq file
-* **rabbitUser**: the user to connect with RabbitMq for messaging
-* **rabbitPassWord**: the password to connect with RabbitMq for messaging
-* **rabbitHost**: the host of the RabbitMq server
-* **rabbitOngoingQueue**: the name of the ongoing queue
-* **rabbitReportQueue**: the name of the report queue
-* **workflowId**: the identifier of the dnet Workflow
+Different protocols are managed by dedicated Collector plugins, i.e. java programs implementing a defined interface:

-##Plugins
-* OAI Plugin 
+```eu.dnetlib.dhp.collection.plugin.CollectorPlugin```
+
+The list of the supported plugins:
+
+* OAI Plugin: collects from OAI-PMH compatible endpoints
+* MDStore plugin: collects from a given D-Net MetadataStore, (identified by moogodb URI, dbName, MDStoreID)
+* MDStore dump plugin: collects from an MDStore dump stored on the HDFS location indicated by the `path` parameter 
+
+# Transformation Plugins
+TODO

-## Usage
-TODO
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@ -7,10 +7,44 @@
        <version>1.2.4-SNAPSHOT</version>
    </parent>
    <artifactId>dhp-aggregation</artifactId>
-    
-  
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <version>${net.alchim31.maven.version}</version>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>initialize</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>scala-test-compile</id>
+                        <phase>process-test-resources</phase>
+                        <goals>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <scalaVersion>${scala.version}</scalaVersion>
+                </configuration>
+            </plugin>
+        </plugins>
+
+    </build>
+
    <dependencies>

+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+        </dependency>
+
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
@ -24,19 +58,7 @@
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-common</artifactId>
            <version>${project.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>com.sun.xml.bind</groupId>
-                    <artifactId>jaxb-core</artifactId>
-                </exclusion>
-            </exclusions>
        </dependency>
-        
-         <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-schemas</artifactId>
-        </dependency>
-

        <dependency>
            <groupId>net.sf.saxon</groupId>
@ -57,6 +79,11 @@
            <artifactId>jaxen</artifactId>
        </dependency>

+        <dependency>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+        </dependency>
+
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
        <dependency>
            <groupId>org.apache.commons</groupId>
@ -77,8 +104,11 @@
            <artifactId>commons-compress</artifactId>
        </dependency>

-
+        <dependency>
+            <groupId>org.mongodb</groupId>
+            <artifactId>mongo-java-driver</artifactId>
+        </dependency>

    </dependencies>

-</project>
+</project>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
@ -75,7 +75,6 @@ public class CollectAndSave implements Serializable {
 			.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
 			.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
-		;
 	}

 	private static void removeOutputDir(SparkSession spark, String path) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
@ -36,7 +36,7 @@ import scala.Tuple2;
 */
 public class SparkAtomicActionScoreJob implements Serializable {

-	private static String DOI = "doi";
+	private static final String DOI = "doi";
 	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
@ -0,0 +1,86 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import org.apache.commons.io.IOUtils
+import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest}
+import org.apache.http.entity.StringEntity
+import org.apache.http.impl.client.HttpClients
+
+import java.io.IOException
+
+abstract class AbstractRestClient extends Iterator[String]{
+
+  var buffer: List[String] = List()
+  var current_index:Int = 0
+
+  var scroll_value: Option[String] = None
+
+  var complete:Boolean = false
+
+
+  def extractInfo(input: String): Unit
+
+  protected def getBufferData(): Unit
+
+
+  def doHTTPGETRequest(url:String): String = {
+    val httpGet = new HttpGet(url)
+    doHTTPRequest(httpGet)
+
+  }
+
+  def doHTTPPOSTRequest(url:String, json:String): String = {
+    val httpPost = new HttpPost(url)
+    if (json != null) {
+      val entity = new StringEntity(json)
+      httpPost.setEntity(entity)
+      httpPost.setHeader("Accept", "application/json")
+      httpPost.setHeader("Content-type", "application/json")
+    }
+    doHTTPRequest(httpPost)
+  }
+
+  def hasNext: Boolean = {
+    buffer.nonEmpty && current_index < buffer.size
+  }
+
+
+  override def next(): String = {
+    val next_item:String = buffer(current_index)
+    current_index = current_index + 1
+    if (current_index == buffer.size)
+      getBufferData()
+    next_item
+  }
+
+
+
+
+  private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
+    val client = HttpClients.createDefault
+    var tries = 4
+    try {
+      while (tries > 0) {
+
+        println(s"requesting ${r.getURI}")
+        val response = client.execute(r)
+        println(s"get response with status${response.getStatusLine.getStatusCode}")
+        if (response.getStatusLine.getStatusCode > 400) {
+          tries -= 1
+        }
+        else
+          return IOUtils.toString(response.getEntity.getContent)
+      }
+      ""
+    } catch {
+      case e: Throwable =>
+        throw new RuntimeException("Error on executing request ", e)
+    } finally try client.close()
+    catch {
+      case e: IOException =>
+        throw new RuntimeException("Unable to close client ", e)
+    }
+  }
+
+  getBufferData()
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
@ -0,0 +1,31 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import org.json4s.{DefaultFormats, JValue}
+import org.json4s.jackson.JsonMethods.{compact, parse, render}
+
+class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = -1) extends AbstractRestClient {
+
+  override def extractInfo(input: String): Unit = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    buffer = (json \ "data").extract[List[JValue]].map(s => compact(render(s)))
+    val next_url = (json \ "links" \ "next").extractOrElse[String](null)
+    scroll_value = if (next_url != null && next_url.nonEmpty) Some(next_url) else None
+    if (scroll_value.isEmpty)
+      complete = true
+    current_index = 0
+  }
+
+  def get_url():String ={
+    val to = if (until> 0) s"$until" else "*"
+    s"https://api.datacite.org/dois?page[cursor]=1&page[size]=$blocks&query=updated:[$timestamp%20TO%20$to]"
+
+  }
+
+  override def getBufferData(): Unit = {
+    if (!complete) {
+      val response = if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) else doHTTPGETRequest(get_url())
+      extractInfo(response)
+    }
+  }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
@ -0,0 +1,500 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
+import eu.dnetlib.dhp.schema.action.AtomicAction
+import eu.dnetlib.dhp.schema.common.ModelConstants
+import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
+import eu.dnetlib.dhp.schema.common.ModelConstants
+import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
+import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
+import eu.dnetlib.dhp.utils.DHPUtils
+import org.apache.commons.lang3.StringUtils
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.{JField, JObject, JString}
+import org.json4s.jackson.JsonMethods.parse
+
+import java.nio.charset.CodingErrorAction
+import java.text.SimpleDateFormat
+import java.time.LocalDate
+import java.time.format.DateTimeFormatter
+import java.util.{Date, Locale}
+import java.util.regex.Pattern
+import scala.collection.JavaConverters._
+import scala.io.{Codec, Source}
+
+case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
+
+case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
+
+case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
+
+case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
+
+case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {}
+
+case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
+
+case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {}
+
+case class DateType(date: Option[String], dateType: Option[String]) {}
+
+case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {}
+
+object DataciteToOAFTransformation {
+
+  implicit val codec: Codec = Codec("UTF-8")
+  codec.onMalformedInput(CodingErrorAction.REPLACE)
+  codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
+
+  val DOI_CLASS = "doi"
+  val SUBJ_CLASS = "keywords"
+
+
+  val j_filter: List[String] = {
+    val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
+    s.lines.toList
+  }
+
+  val mapper = new ObjectMapper()
+  val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F))
+
+  val dataInfo: DataInfo = generateDataInfo("0.9")
+  val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite")
+
+  val hostedByMap: Map[String, HostedByMapType] = {
+    val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(s)
+    json.extract[Map[String, HostedByMapType]]
+  }
+
+  val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
+  val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
+
+  val funder_regex: List[(Pattern, String)] = List(
+    (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"),
+    (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::")
+
+  )
+
+  val Date_regex: List[Pattern] = List(
+    //Y-M-D
+    Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE),
+    //M-D-Y
+    Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE),
+    //D-M-Y
+    Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE),
+    //Y
+    Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
+  )
+
+
+  def filter_json(json: String): Boolean = {
+    j_filter.exists(f => json.contains(f))
+  }
+
+  def toActionSet(item: Oaf): (String, String) = {
+    val mapper = new ObjectMapper()
+
+    item match {
+      case dataset: OafDataset =>
+        val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
+        a.setClazz(classOf[OafDataset])
+        a.setPayload(dataset)
+        (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
+      case publication: Publication =>
+        val a: AtomicAction[Publication] = new AtomicAction[Publication]
+        a.setClazz(classOf[Publication])
+        a.setPayload(publication)
+        (publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
+      case software: Software =>
+        val a: AtomicAction[Software] = new AtomicAction[Software]
+        a.setClazz(classOf[Software])
+        a.setPayload(software)
+        (software.getClass.getCanonicalName, mapper.writeValueAsString(a))
+      case orp: OtherResearchProduct =>
+        val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
+        a.setClazz(classOf[OtherResearchProduct])
+        a.setPayload(orp)
+        (orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
+
+      case relation: Relation =>
+        val a: AtomicAction[Relation] = new AtomicAction[Relation]
+        a.setClazz(classOf[Relation])
+        a.setPayload(relation)
+        (relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
+      case _ =>
+        null
+    }
+
+  }
+
+
+  def embargo_end(embargo_end_date: String): Boolean = {
+    val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
+    val td = LocalDate.now()
+    td.isAfter(dt)
+  }
+
+
+  def extract_date(input: String): Option[String] = {
+    val d = Date_regex.map(pattern => {
+      val matcher = pattern.matcher(input)
+      if (matcher.find())
+        matcher.group(0)
+      else
+        null
+    }
+    ).find(s => s != null)
+
+    if (d.isDefined) {
+      val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get
+      try {
+        return Some(LocalDate.parse(a_date, df_en).toString)
+      } catch {
+        case _: Throwable => try {
+          return Some(LocalDate.parse(a_date, df_it).toString)
+        } catch {
+          case _: Throwable =>
+            return None
+        }
+      }
+    }
+    d
+  }
+
+  def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = {
+    if (resourceType != null && resourceType.nonEmpty) {
+      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
+      if (typeQualifier != null)
+        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
+    }
+    if (schemaOrg != null && schemaOrg.nonEmpty) {
+      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg)
+      if (typeQualifier != null)
+        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
+
+    }
+    if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
+      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral)
+      if (typeQualifier != null)
+        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
+
+    }
+    null
+  }
+
+
+  def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = {
+    val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
+    if (typeQualifiers == null)
+      return null
+    val i = new Instance
+    i.setInstancetype(typeQualifiers._1)
+    typeQualifiers._2.getClassname match {
+      case "dataset" =>
+        val r = new OafDataset
+        r.setInstance(List(i).asJava)
+        return r
+      case "publication" =>
+        val r = new Publication
+        r.setInstance(List(i).asJava)
+        return r
+      case "software" =>
+        val r = new Software
+        r.setInstance(List(i).asJava)
+        return r
+      case "other" =>
+        val r = new OtherResearchProduct
+        r.setInstance(List(i).asJava)
+        return r
+    }
+    null
+  }
+
+
+  def available_date(input: String): Boolean = {
+
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    val l: List[String] = for {
+      JObject(dates) <- json \\ "dates"
+      JField("dateType", JString(dateTypes)) <- dates
+    } yield dateTypes
+
+    l.exists(p => p.equalsIgnoreCase("available"))
+
+  }
+
+
+  /**
+   * As describe in ticket #6377
+   * when the result come from figshare we need to remove subject
+   * and set Access rights OPEN.
+   * @param r
+   */
+  def fix_figshare(r: Result): Unit = {
+
+    if (r.getInstance() != null) {
+      val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
+      if (hosted_by_figshare) {
+        r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
+        val l: List[StructuredProperty] = List()
+        r.setSubject(l.asJava)
+      }
+    }
+
+
+  }
+
+  def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
+    OafMapperUtils.structuredProperty(dt, q, null)
+  }
+
+  def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = {
+
+    val r = new Relation
+    r.setSource(sourceId)
+    r.setTarget(targetId)
+    r.setRelType(ModelConstants.RESULT_PROJECT)
+    r.setRelClass(relClass)
+    r.setSubRelType(ModelConstants.OUTCOME)
+    r.setCollectedfrom(List(cf).asJava)
+    r.setDataInfo(di)
+    r
+
+
+  }
+
+  def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
+    val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find())
+
+    if (match_pattern.isDefined) {
+      val m = match_pattern.get._1
+      val p = match_pattern.get._2
+      val grantId = m.matcher(awardUri).replaceAll("$2")
+      val targetId = s"$p${DHPUtils.md5(grantId)}"
+      List(
+        generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
+        generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo)
+      )
+    }
+    else
+      List()
+
+  }
+
+
+  def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup): List[Oaf] = {
+    if (filter_json(input))
+      return List()
+
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json = parse(input)
+
+    val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
+    val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
+    val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null)
+
+    val doi = (json \ "attributes" \ "doi").extract[String]
+    if (doi.isEmpty)
+      return List()
+
+    //Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies
+    val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
+    if (result == null)
+      return List()
+
+
+    val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES)
+    val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
+    result.setPid(List(pid).asJava)
+    result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
+    result.setOriginalId(List(doi).asJava)
+
+    val d = new Date(dateOfCollection * 1000)
+    val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
+
+
+    result.setDateofcollection(ISO8601FORMAT.format(d))
+    result.setDateoftransformation(ISO8601FORMAT.format(ts))
+    result.setDataInfo(dataInfo)
+
+    val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())
+
+
+    val authors = creators.zipWithIndex.map { case (c, idx) =>
+      val a = new Author
+      a.setFullname(c.name.orNull)
+      a.setName(c.givenName.orNull)
+      a.setSurname(c.familyName.orNull)
+      if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
+        a.setPid(c.nameIdentifiers.get.map(ni => {
+          val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
+          if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
+            OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
+          }
+          else
+            null
+
+        }
+        )
+          .asJava)
+      }
+      if (c.affiliation.isDefined)
+        a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava)
+      a.setRank(idx + 1)
+      a
+    }
+
+
+    val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
+
+    result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
+      if (t.titleType.isEmpty) {
+        OafMapperUtils.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null)
+      } else {
+        OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE, null)
+      }
+    }).asJava)
+
+    if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
+      return List()
+    result.setAuthor(authors.asJava)
+
+    val dates = (json \\ "dates").extract[List[DateType]]
+    val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
+
+    val i_date = dates
+      .filter(d => d.date.isDefined && d.dateType.isDefined)
+      .find(d => d.dateType.get.equalsIgnoreCase("issued"))
+      .map(d => extract_date(d.date.get))
+    val a_date: Option[String] = dates
+      .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
+      .map(d => extract_date(d.date.get))
+      .find(d => d != null && d.isDefined)
+      .map(d => d.get)
+
+    if (a_date.isDefined) {
+      result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null))
+    }
+    if (i_date.isDefined && i_date.get.isDefined) {
+      result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
+      result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
+    }
+    else if (publication_year != null) {
+      result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
+      result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
+    }
+
+
+    result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined)
+      .map(d => (extract_date(d.date.get), d.dateType.get))
+      .filter(d => d._1.isDefined)
+      .map(d => (d._1.get, vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase())))
+      .filter(d => d._2 != null)
+      .map(d => generateOAFDate(d._1, d._2)).asJava)
+
+    val subjects = (json \\ "subjects").extract[List[SubjectType]]
+
+    result.setSubject(subjects.filter(s => s.subject.nonEmpty)
+      .map(s =>
+        OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null)
+      ).asJava)
+
+
+    result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
+
+    val descriptions = (json \\ "descriptions").extract[List[DescriptionType]]
+
+    result.setDescription(
+      descriptions
+        .filter(d => d.description.isDefined).
+        map(d =>
+          OafMapperUtils.field(d.description.get, null)
+        ).filter(s => s != null).asJava)
+
+
+    val publisher = (json \\ "publisher").extractOrElse[String](null)
+    if (publisher != null)
+      result.setPublisher(OafMapperUtils.field(publisher, null))
+
+
+    val language: String = (json \\ "language").extractOrElse[String](null)
+
+    if (language != null)
+      result.setLanguage(vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language))
+
+
+    val instance = result.getInstance().get(0)
+
+    val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
+
+    val accessRights: List[String] = for {
+      JObject(rightsList) <- json \\ "rightsList"
+      JField("rightsUri", JString(rightsUri)) <- rightsList
+    } yield rightsUri
+
+    val aRights: Option[AccessRight] = accessRights.map(r => {
+      vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r)
+    }).find(q => q != null).map(q => {
+      val a = new AccessRight
+      a.setClassid(q.getClassid)
+      a.setClassname(q.getClassname)
+      a.setSchemeid(q.getSchemeid)
+      a.setSchemename(q.getSchemename)
+      a
+    })
+
+
+    val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
+
+    if (client.isDefined) {
+      val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
+      instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
+      instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
+      instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
+      instance.setAccessright(access_rights_qualifier)
+      instance.setPid(result.getPid)
+      val license = accessRights
+        .find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*"))
+      if (license.isDefined)
+        instance.setLicense(OafMapperUtils.field(license.get, null))
+    }
+
+    val awardUris: List[String] = for {
+      JObject(fundingReferences) <- json \\ "fundingReferences"
+      JField("awardUri", JString(awardUri)) <- fundingReferences
+    } yield awardUri
+
+    val relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
+    fix_figshare(result)
+    result.setId(IdentifierFactory.createIdentifier(result))
+    if (result.getId == null)
+      return List()
+    if (relations != null && relations.nonEmpty) {
+      List(result) ::: relations
+    }
+    else
+      List(result)
+  }
+
+  def generateDataInfo(trust: String): DataInfo = {
+    val di = new DataInfo
+    di.setDeletedbyinference(false)
+    di.setInferred(false)
+    di.setInvisible(false)
+    di.setTrust(trust)
+    di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER)
+    di
+  }
+
+  def generateDSId(input: String): String = {
+    val b = StringUtils.substringBefore(input, "::")
+    val a = StringUtils.substringAfter(input, "::")
+    s"10|$b::${DHPUtils.md5(a)}"
+  }
+
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
@ -0,0 +1,41 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.schema.oaf.Oaf
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.hadoop.mapred.SequenceFileOutputFormat
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.io.Source
+
+object ExportActionSetJobNode {
+
+  val log: Logger = LoggerFactory.getLogger(ExportActionSetJobNode.getClass)
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf
+    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json")).mkString)
+    parser.parseArgument(args)
+    val master = parser.get("master")
+    val sourcePath = parser.get("sourcePath")
+    val targetPath = parser.get("targetPath")
+
+    val spark: SparkSession = SparkSession.builder().config(conf)
+      .appName(ExportActionSetJobNode.getClass.getSimpleName)
+      .master(master)
+      .getOrCreate()
+    implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
+    implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING)
+
+    spark.read.load(sourcePath).as[Oaf]
+      .map(o =>DataciteToOAFTransformation.toActionSet(o))
+      .filter(o => o!= null)
+      .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
+
+
+  }
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
@ -0,0 +1,47 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord
+import eu.dnetlib.dhp.schema.oaf.Oaf
+import eu.dnetlib.dhp.utils.ISLookupClientFactory
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.io.Source
+
+object GenerateDataciteDatasetSpark {
+
+  val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf
+    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
+    parser.parseArgument(args)
+    val master = parser.get("master")
+    val sourcePath = parser.get("sourcePath")
+    val targetPath = parser.get("targetPath")
+    val isLookupUrl: String = parser.get("isLookupUrl")
+    log.info("isLookupUrl: {}", isLookupUrl)
+
+    val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
+    val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
+    val spark: SparkSession = SparkSession.builder().config(conf)
+      .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName)
+      .master(master)
+      .getOrCreate()
+
+    implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
+
+    implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
+
+    import spark.implicits._
+
+    spark.read.load(sourcePath).as[DataciteType]
+      .filter(d => d.isActive)
+      .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies))
+      .filter(d => d != null)
+      .write.mode(SaveMode.Overwrite).save(targetPath)
+  }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
@ -0,0 +1,186 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.actionmanager.datacite.DataciteToOAFTransformation.df_it
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
+import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.io.{IntWritable, SequenceFile, Text}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods.parse
+import org.apache.spark.sql.functions.max
+import org.slf4j.{Logger, LoggerFactory}
+
+import java.time.format.DateTimeFormatter._
+import java.time.{LocalDate, LocalDateTime, ZoneOffset}
+import scala.io.Source
+
+object ImportDatacite {
+
+  val log: Logger = LoggerFactory.getLogger(ImportDatacite.getClass)
+
+
+  def convertAPIStringToDataciteItem(input: String): DataciteType = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    val doi = (json \ "attributes" \ "doi").extract[String].toLowerCase
+
+    val isActive = (json \ "attributes" \ "isActive").extract[Boolean]
+
+    val timestamp_string = (json \ "attributes" \ "updated").extract[String]
+    val dt = LocalDateTime.parse(timestamp_string, ISO_DATE_TIME)
+    DataciteType(doi = doi, timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, isActive = isActive, json = input)
+
+  }
+
+
+  def main(args: Array[String]): Unit = {
+
+    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json")).mkString)
+    parser.parseArgument(args)
+    val master = parser.get("master")
+
+    val hdfsuri = parser.get("namenode")
+    log.info(s"namenode is $hdfsuri")
+
+    val targetPath = parser.get("targetPath")
+    log.info(s"targetPath is $targetPath")
+
+    val dataciteDump = parser.get("dataciteDumpPath")
+    log.info(s"dataciteDump is $dataciteDump")
+
+    val hdfsTargetPath = new Path(targetPath)
+    log.info(s"hdfsTargetPath is $hdfsTargetPath")
+
+    val bs = if (parser.get("blocksize") == null) 100 else parser.get("blocksize").toInt
+
+    val spkipImport = parser.get("skipImport")
+    log.info(s"skipImport is $spkipImport")
+
+    val spark: SparkSession = SparkSession.builder()
+      .appName(ImportDatacite.getClass.getSimpleName)
+      .master(master)
+      .getOrCreate()
+
+    // ====== Init HDFS File System Object
+    val conf = new Configuration
+    // Set FileSystem URI
+    conf.set("fs.defaultFS", hdfsuri)
+
+    // Because of Maven
+    conf.set("fs.hdfs.impl", classOf[DistributedFileSystem].getName)
+    conf.set("fs.file.impl", classOf[LocalFileSystem].getName)
+    val sc: SparkContext = spark.sparkContext
+    sc.setLogLevel("ERROR")
+
+    import spark.implicits._
+
+
+    val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable {
+
+      override def zero: DataciteType = null
+
+      override def reduce(a: DataciteType, b: DataciteType): DataciteType = {
+        if (b == null)
+          return a
+        if (a == null)
+          return b
+        if (a.timestamp > b.timestamp) {
+          return a
+        }
+        b
+      }
+
+      override def merge(a: DataciteType, b: DataciteType): DataciteType = {
+        reduce(a, b)
+      }
+
+      override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
+
+      override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
+
+      override def finish(reduction: DataciteType): DataciteType = reduction
+    }
+
+    val dump: Dataset[DataciteType] = spark.read.load(dataciteDump).as[DataciteType]
+    val ts = dump.select(max("timestamp")).first().getLong(0)
+
+    println(s"last Timestamp is $ts")
+
+    val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf, bs)
+
+    println(s"Imported from Datacite API $cnt documents")
+
+    if (cnt > 0) {
+
+      val inputRdd: RDD[DataciteType] = sc.sequenceFile(targetPath, classOf[Int], classOf[Text])
+        .map(s => s._2.toString)
+        .map(s => convertAPIStringToDataciteItem(s))
+      spark.createDataset(inputRdd).write.mode(SaveMode.Overwrite).save(s"${targetPath}_dataset")
+
+      val ds: Dataset[DataciteType] = spark.read.load(s"${targetPath}_dataset").as[DataciteType]
+
+      dump
+        .union(ds)
+        .groupByKey(_.doi)
+        .agg(dataciteAggregator.toColumn)
+        .map(s => s._2)
+        .repartition(4000)
+        .write.mode(SaveMode.Overwrite).save(s"${dataciteDump}_updated")
+
+      val fs = FileSystem.get(sc.hadoopConfiguration)
+      fs.delete(new Path(s"$dataciteDump"), true)
+      fs.rename(new Path(s"${dataciteDump}_updated"), new Path(s"$dataciteDump"))
+    }
+  }
+
+  private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = {
+    var from:Long = timestamp * 1000
+    val delta:Long = 50000000L
+    var client: DataciteAPIImporter = null
+    val now :Long =System.currentTimeMillis()
+    var i = 0
+    try {
+      val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text]))
+      try {
+        var start: Long = System.currentTimeMillis
+        while (from < now) {
+          client = new DataciteAPIImporter(from, bs, from + delta)
+          var end: Long = 0
+          val key: IntWritable = new IntWritable(i)
+          val value: Text = new Text
+          while (client.hasNext) {
+            key.set({
+              i += 1;
+              i - 1
+            })
+            value.set(client.next())
+            writer.append(key, value)
+            writer.hflush()
+            if (i % 1000 == 0) {
+              end = System.currentTimeMillis
+              val time = (end - start) / 1000.0F
+              println(s"Imported $i in $time seconds")
+              start = System.currentTimeMillis
+            }
+          }
+          println(s"updating from value: $from  -> ${from+delta}")
+          from = from + delta
+        }
+      } catch {
+        case e: Throwable =>
+          println("Error", e)
+      } finally if (writer != null) writer.close()
+    }
+    catch {
+      case e: Throwable =>
+        log.error("Error", e)
+    }
+    i
+  }
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
@ -248,7 +248,7 @@ public class PrepareProgramme {
 							parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
 						}
 						if (current.trim().length() > parent.length()
-							&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) {
+							&& current.toLowerCase().trim().startsWith(parent)) {
 							current = current.substring(parent.length() + 1);
 							if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '–') {
 								current = current.trim().substring(1).trim();
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
@ -18,7 +18,6 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

-
 import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -33,7 +32,6 @@ public class PrepareProjects {
 	private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

-
 	public static void main(String[] args) throws Exception {

 		String jsonConfiguration = IOUtils
@ -93,7 +91,7 @@ public class PrepareProjects {
 	}

 	private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() {
-		return (FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject>) value -> {
+		return value -> {
 			Optional<CSVProject> csvProject = Optional.ofNullable(value._2());
 			List<CSVProject> csvProjectList = new ArrayList<>();
 			if (csvProject.isPresent()) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java
@ -1,20 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project.httpconnector;
-
-import java.util.LinkedList;
-
-public class CollectorPluginErrorLogList extends LinkedList<String> {
-
-	private static final long serialVersionUID = -6925786561303289704L;
-
-	@Override
-	public String toString() {
-		String log = new String();
-		int index = 0;
-		for (String errorMessage : this) {
-			log += String.format("Retry #%s: %s / ", index++, errorMessage);
-		}
-		return log;
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java
@ -1,20 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project.httpconnector;
-
-public class CollectorServiceException extends Exception {
-
-	private static final long serialVersionUID = 7523999812098059764L;
-
-	public CollectorServiceException(String string) {
-		super(string);
-	}
-
-	public CollectorServiceException(String string, Throwable exception) {
-		super(string, exception);
-	}
-
-	public CollectorServiceException(Throwable exception) {
-		super(exception);
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java
@ -1,240 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project.httpconnector;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.*;
-import java.security.GeneralSecurityException;
-import java.security.cert.X509Certificate;
-import java.util.List;
-import java.util.Map;
-
-import javax.net.ssl.HttpsURLConnection;
-import javax.net.ssl.SSLContext;
-import javax.net.ssl.TrustManager;
-import javax.net.ssl.X509TrustManager;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.math.NumberUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-/**
- * @author jochen, michele, andrea
- */
-public class HttpConnector {
-
-	private static final Log log = LogFactory.getLog(HttpConnector.class);
-
-	private int maxNumberOfRetry = 6;
-	private int defaultDelay = 120; // seconds
-	private int readTimeOut = 120; // seconds
-
-	private String responseType = null;
-
-	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
-
-	public HttpConnector() {
-		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
-	}
-
-	/**
-	 * Given the URL returns the content via HTTP GET
-	 *
-	 * @param requestUrl the URL
-	 * @return the content of the downloaded resource
-	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
-	 */
-	public String getInputSource(final String requestUrl) throws CollectorServiceException {
-		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
-	}
-
-	/**
-	 * Given the URL returns the content as a stream via HTTP GET
-	 *
-	 * @param requestUrl the URL
-	 * @return the content of the downloaded resource as InputStream
-	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
-	 */
-	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException {
-		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
-	}
-
-	private String attemptDownlaodAsString(final String requestUrl, final int retryNumber,
-		final CollectorPluginErrorLogList errorList)
-		throws CollectorServiceException {
-		try {
-			InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
-			try {
-				return IOUtils.toString(s);
-			} catch (IOException e) {
-				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
-				Thread.sleep(defaultDelay * 1000);
-				errorList.add(e.getMessage());
-				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
-			} finally {
-				IOUtils.closeQuietly(s);
-			}
-		} catch (InterruptedException e) {
-			throw new CollectorServiceException(e);
-		}
-	}
-
-	private InputStream attemptDownload(final String requestUrl, final int retryNumber,
-		final CollectorPluginErrorLogList errorList)
-		throws CollectorServiceException {
-
-		if (retryNumber > maxNumberOfRetry) {
-			throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList);
-		}
-
-		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
-		try {
-			InputStream input = null;
-
-			try {
-				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
-				urlConn.setInstanceFollowRedirects(false);
-				urlConn.setReadTimeout(readTimeOut * 1000);
-				urlConn.addRequestProperty("User-Agent", userAgent);
-
-				if (log.isDebugEnabled()) {
-					logHeaderFields(urlConn);
-				}
-
-				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
-				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
-					log.warn("waiting and repeating request after " + retryAfter + " sec.");
-					Thread.sleep(retryAfter * 1000);
-					errorList.add("503 Service Unavailable");
-					urlConn.disconnect();
-					return attemptDownload(requestUrl, retryNumber + 1, errorList);
-				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM)
-					|| (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) {
-					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
-					log.debug("The requested url has been moved to " + newUrl);
-					errorList
-						.add(
-							String
-								.format(
-									"%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(),
-									newUrl));
-					urlConn.disconnect();
-					return attemptDownload(newUrl, retryNumber + 1, errorList);
-				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
-					log
-						.error(
-							String
-								.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
-					Thread.sleep(defaultDelay * 1000);
-					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
-					urlConn.disconnect();
-					return attemptDownload(requestUrl, retryNumber + 1, errorList);
-				} else {
-					input = urlConn.getInputStream();
-					responseType = urlConn.getContentType();
-					return input;
-				}
-			} catch (IOException e) {
-				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
-				Thread.sleep(defaultDelay * 1000);
-				errorList.add(e.getMessage());
-				return attemptDownload(requestUrl, retryNumber + 1, errorList);
-			}
-		} catch (InterruptedException e) {
-			throw new CollectorServiceException(e);
-		}
-	}
-
-	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
-		log.debug("StatusCode: " + urlConn.getResponseMessage());
-
-		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
-			if (e.getKey() != null) {
-				for (String v : e.getValue()) {
-					log.debug("  key: " + e.getKey() + " - value: " + v);
-				}
-			}
-		}
-	}
-
-	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
-		for (String key : headerMap.keySet()) {
-			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0)
-				&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
-				return Integer
-					.parseInt(headerMap.get(key).get(0)) + 10;
-			}
-		}
-		return -1;
-	}
-
-	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
-		for (String key : headerMap.keySet()) {
-			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) {
-				return headerMap.get(key).get(0);
-			}
-		}
-		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
-	}
-
-	/**
-	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
-	 */
-	public void initTrustManager() {
-		final X509TrustManager tm = new X509TrustManager() {
-
-			@Override
-			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
-			}
-
-			@Override
-			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
-			}
-
-			@Override
-			public X509Certificate[] getAcceptedIssuers() {
-				return null;
-			}
-		};
-		try {
-			final SSLContext ctx = SSLContext.getInstance("TLS");
-			ctx.init(null, new TrustManager[] {
-				tm
-			}, null);
-			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
-		} catch (GeneralSecurityException e) {
-			log.fatal(e);
-			throw new IllegalStateException(e);
-		}
-	}
-
-	public int getMaxNumberOfRetry() {
-		return maxNumberOfRetry;
-	}
-
-	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
-		this.maxNumberOfRetry = maxNumberOfRetry;
-	}
-
-	public int getDefaultDelay() {
-		return defaultDelay;
-	}
-
-	public void setDefaultDelay(final int defaultDelay) {
-		this.defaultDelay = defaultDelay;
-	}
-
-	public int getReadTimeOut() {
-		return readTimeOut;
-	}
-
-	public void setReadTimeOut(final int readTimeOut) {
-		this.readTimeOut = readTimeOut;
-	}
-
-	public String getResponseType() {
-		return responseType;
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
@ -17,8 +17,8 @@ import org.apache.hadoop.fs.Path;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.collection.HttpConnector2;

 /**
 * Applies the parsing of a csv file and writes the Serialization of it in hdfs
@ -28,7 +28,7 @@ public class ReadCSV implements Closeable {
 	private final Configuration conf;
 	private final BufferedWriter writer;
 	private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-	private String csvFile;
+	private final String csvFile;

 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -74,7 +74,7 @@ public class ReadCSV implements Closeable {
 		throws Exception {
 		this.conf = new Configuration();
 		this.conf.set("fs.defaultFS", hdfsNameNode);
-		HttpConnector httpConnector = new HttpConnector();
+		HttpConnector2 httpConnector = new HttpConnector2();
 		FileSystem fileSystem = FileSystem.get(this.conf);
 		Path hdfsWritePath = new Path(hdfsPath);
 		FSDataOutputStream fsDataOutputStream = null;
@ -85,7 +85,6 @@ public class ReadCSV implements Closeable {

 		this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
 		this.csvFile = httpConnector.getInputSource(fileURL);
-		;
 	}

 	protected void write(final Object p) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
@ -14,19 +14,18 @@ import org.apache.hadoop.fs.Path;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.collection.HttpConnector2;

 /**
 * Applies the parsing of an excel file and writes the Serialization of it in hdfs
 */
-
 public class ReadExcel implements Closeable {
 	private static final Log log = LogFactory.getLog(ReadCSV.class);
 	private final Configuration conf;
 	private final BufferedWriter writer;
 	private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-	private InputStream excelFile;
+	private final InputStream excelFile;

 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -73,7 +72,7 @@ public class ReadExcel implements Closeable {
 		throws Exception {
 		this.conf = new Configuration();
 		this.conf.set("fs.defaultFS", hdfsNameNode);
-		HttpConnector httpConnector = new HttpConnector();
+		HttpConnector2 httpConnector = new HttpConnector2();
 		FileSystem fileSystem = FileSystem.get(this.conf);
 		Path hdfsWritePath = new Path(hdfsPath);
 		FSDataOutputStream fsDataOutputStream = null;
@ -84,7 +83,6 @@ public class ReadExcel implements Closeable {

 		this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
 		this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
-		;
 	}

 	protected void write(final Object p) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
@ -3,11 +3,11 @@ package eu.dnetlib.dhp.actionmanager.ror;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;

 import java.io.InputStream;
 import java.util.ArrayList;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.aggregation.common;
+
+import java.io.Serializable;
+
+import org.apache.spark.util.LongAccumulator;
+
+public class AggregationCounter implements Serializable {
+	private LongAccumulator totalItems;
+	private LongAccumulator errorItems;
+	private LongAccumulator processedItems;
+
+	public AggregationCounter() {
+	}
+
+	public AggregationCounter(LongAccumulator totalItems, LongAccumulator errorItems, LongAccumulator processedItems) {
+		this.totalItems = totalItems;
+		this.errorItems = errorItems;
+		this.processedItems = processedItems;
+	}
+
+	public LongAccumulator getTotalItems() {
+		return totalItems;
+	}
+
+	public void setTotalItems(LongAccumulator totalItems) {
+		this.totalItems = totalItems;
+	}
+
+	public LongAccumulator getErrorItems() {
+		return errorItems;
+	}
+
+	public void setErrorItems(LongAccumulator errorItems) {
+		this.errorItems = errorItems;
+	}
+
+	public LongAccumulator getProcessedItems() {
+		return processedItems;
+	}
+
+	public void setProcessedItems(LongAccumulator processedItems) {
+		this.processedItems = processedItems;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
@ -0,0 +1,47 @@
+
+package eu.dnetlib.dhp.aggregation.common;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.message.MessageSender;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class AggregatorReport extends LinkedHashMap<String, String> implements Closeable {
+
+	private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
+
+	private MessageSender messageSender;
+
+	public AggregatorReport() {
+	}
+
+	public AggregatorReport(MessageSender messageSender) throws IOException {
+		this.messageSender = messageSender;
+	}
+
+	public void ongoing(Long current, Long total) {
+		messageSender.sendMessage(current, total);
+	}
+
+	@Override
+	public void close() throws IOException {
+		if (Objects.nonNull(messageSender)) {
+			log.info("closing report: ");
+			this.forEach((k, v) -> log.info("{} - {}", k, v));
+
+			Map<String, String> m = new HashMap<>();
+			m.put(getClass().getSimpleName().toLowerCase(), DHPUtils.MAPPER.writeValueAsString(values()));
+			messageSender.sendReport(m);
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReporterCallback.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReporterCallback.java
@ -0,0 +1,10 @@
+
+package eu.dnetlib.dhp.aggregation.common;
+
+public interface ReporterCallback {
+
+	Long getCurrent();
+
+	Long getTotal();
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
@ -0,0 +1,41 @@
+
+package eu.dnetlib.dhp.aggregation.common;
+
+import java.util.TimerTask;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+public abstract class ReportingJob {
+
+	/**
+	 * Frequency (seconds) for sending ongoing messages to report the collection task advancement
+	 */
+	public static final int ONGOING_REPORT_FREQUENCY = 5;
+
+	/**
+	 * Initial delay (seconds) for sending ongoing messages to report the collection task advancement
+	 */
+	public static final int INITIAL_DELAY = 2;
+
+	private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
+
+	protected final AggregatorReport report;
+
+	public ReportingJob(AggregatorReport report) {
+		this.report = report;
+	}
+
+	protected void schedule(final ReporterCallback callback) {
+		executor.scheduleAtFixedRate(new TimerTask() {
+			@Override
+			public void run() {
+				report.ongoing(callback.getCurrent(), callback.getTotal());
+			}
+		}, INITIAL_DELAY, ONGOING_REPORT_FREQUENCY, TimeUnit.SECONDS);
+	}
+
+	protected void shutdown() {
+		executor.shutdown();
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java
@ -0,0 +1,136 @@
+
+package eu.dnetlib.dhp.aggregation.mdstore;
+
+import static eu.dnetlib.dhp.common.Constants.*;
+import static eu.dnetlib.dhp.utils.DHPUtils.*;
+
+import java.net.URI;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.rest.DNetRestClient;
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
+
+public class MDStoreActionNode {
+	private static final Logger log = LoggerFactory.getLogger(MDStoreActionNode.class);
+
+	enum MDAction {
+		NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
+	}
+
+	public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
+
+	public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
+	public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
+
+	public static final String READ_LOCK_URL = "%s/mdstore/%s/startReading";
+	public static final String READ_UNLOCK_URL = "%s/version/%s/endReading";
+
+	private static final String MDSTOREVERSIONPARAM = "mdStoreVersion";
+	private static final String MDSTOREREADLOCKPARAM = "mdStoreReadLockVersion";
+
+	public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					MDStoreActionNode.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/collection/mdstore_action_parameters.json")));
+		argumentParser.parseArgument(args);
+
+		log.info("Java Xmx: {}m", Runtime.getRuntime().maxMemory() / (1024 * 1024));
+
+		final MDAction action = MDAction.valueOf(argumentParser.get("action"));
+		log.info("Current action is {}", action);
+
+		final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI");
+		log.info("mdStoreManagerURI is {}", mdStoreManagerURI);
+
+		switch (action) {
+			case NEW_VERSION: {
+				final String mdStoreID = argumentParser.get("mdStoreID");
+				if (StringUtils.isBlank(mdStoreID)) {
+					throw new IllegalArgumentException("missing or empty argument mdStoreId");
+				}
+				final MDStoreVersion currentVersion = DNetRestClient
+					.doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
+				populateOOZIEEnv(MDSTOREVERSIONPARAM, MAPPER.writeValueAsString(currentVersion));
+				break;
+			}
+			case COMMIT: {
+
+				final String hdfsuri = argumentParser.get("namenode");
+				if (StringUtils.isBlank(hdfsuri)) {
+					throw new IllegalArgumentException("missing or empty argument namenode");
+				}
+				final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
+				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
+
+				if (StringUtils.isBlank(mdStoreVersion.getId())) {
+					throw new IllegalArgumentException(
+						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
+				}
+				Path hdfstoreSizepath = new Path(mdStoreVersion.getHdfsPath() + MDSTORE_SIZE_PATH);
+
+				try (
+					FileSystem fs = FileSystem.get(URI.create(hdfsuri), getHadoopConfiguration(hdfsuri));
+					FSDataInputStream inputStream = fs.open(hdfstoreSizepath)) {
+
+					final Long mdStoreSize = Long.parseLong(IOUtils.toString(inputStream));
+
+					fs.create(hdfstoreSizepath);
+					DNetRestClient
+						.doGET(
+							String.format(COMMIT_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId(), mdStoreSize));
+				}
+
+				break;
+			}
+			case ROLLBACK: {
+				final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
+				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
+
+				if (StringUtils.isBlank(mdStoreVersion.getId())) {
+					throw new IllegalArgumentException(
+						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
+				}
+				DNetRestClient.doGET(String.format(ROLLBACK_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId()));
+				break;
+			}
+
+			case READ_LOCK: {
+				final String mdStoreID = argumentParser.get("mdStoreID");
+				if (StringUtils.isBlank(mdStoreID)) {
+					throw new IllegalArgumentException("missing or empty argument mdStoreId");
+				}
+				final MDStoreVersion currentVersion = DNetRestClient
+					.doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
+				populateOOZIEEnv(MDSTOREREADLOCKPARAM, MAPPER.writeValueAsString(currentVersion));
+				break;
+			}
+			case READ_UNLOCK: {
+				final String mdStoreVersion_params = argumentParser.get("readMDStoreId");
+				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
+
+				if (StringUtils.isBlank(mdStoreVersion.getId())) {
+					throw new IllegalArgumentException(
+						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
+				}
+				DNetRestClient.doGET(String.format(READ_UNLOCK_URL, mdStoreManagerURI, mdStoreVersion.getId()));
+				break;
+			}
+
+			default:
+				throw new IllegalArgumentException("invalid action");
+		}
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
@ -1,16 +1,16 @@

-package eu.dnetlib.dhp.collection.worker;
+package eu.dnetlib.dhp.collection;

-public class DnetCollectorException extends Exception {
+public class CollectorException extends Exception {

 	/** */
 	private static final long serialVersionUID = -290723075076039757L;

-	public DnetCollectorException() {
+	public CollectorException() {
 		super();
 	}

-	public DnetCollectorException(
+	public CollectorException(
 		final String message,
 		final Throwable cause,
 		final boolean enableSuppression,
@ -18,15 +18,15 @@ public class DnetCollectorException extends Exception {
 		super(message, cause, enableSuppression, writableStackTrace);
 	}

-	public DnetCollectorException(final String message, final Throwable cause) {
+	public CollectorException(final String message, final Throwable cause) {
 		super(message, cause);
 	}

-	public DnetCollectorException(final String message) {
+	public CollectorException(final String message) {
 		super(message);
 	}

-	public DnetCollectorException(final Throwable cause) {
+	public CollectorException(final Throwable cause) {
 		super(cause);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@ -0,0 +1,134 @@
+
+package eu.dnetlib.dhp.collection;
+
+import static eu.dnetlib.dhp.common.Constants.SEQUENCE_FILE_NAME;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.DeflateCodec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
+import eu.dnetlib.dhp.aggregation.common.ReportingJob;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
+
+public class CollectorWorker extends ReportingJob {
+
+	private static final Logger log = LoggerFactory.getLogger(CollectorWorker.class);
+
+	private final ApiDescriptor api;
+
+	private final FileSystem fileSystem;
+
+	private final MDStoreVersion mdStoreVersion;
+
+	private final HttpClientParams clientParams;
+
+	public CollectorWorker(
+		final ApiDescriptor api,
+		final FileSystem fileSystem,
+		final MDStoreVersion mdStoreVersion,
+		final HttpClientParams clientParams,
+		final AggregatorReport report) {
+		super(report);
+		this.api = api;
+		this.fileSystem = fileSystem;
+		this.mdStoreVersion = mdStoreVersion;
+		this.clientParams = clientParams;
+	}
+
+	public void collect() throws UnknownCollectorPluginException, CollectorException, IOException {
+
+		final String outputPath = mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
+		log.info("outputPath path is {}", outputPath);
+
+		final CollectorPlugin plugin = getCollectorPlugin();
+		final AtomicInteger counter = new AtomicInteger(0);
+
+		scheduleReport(counter);
+
+		try (SequenceFile.Writer writer = SequenceFile
+			.createWriter(
+				fileSystem.getConf(),
+				SequenceFile.Writer.file(new Path(outputPath)),
+				SequenceFile.Writer.keyClass(IntWritable.class),
+				SequenceFile.Writer.valueClass(Text.class),
+				SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
+			final IntWritable key = new IntWritable(counter.get());
+			final Text value = new Text();
+			plugin
+				.collect(api, report)
+				.forEach(
+					content -> {
+						key.set(counter.getAndIncrement());
+						value.set(content);
+						try {
+							writer.append(key, value);
+						} catch (Throwable e) {
+							throw new RuntimeException(e);
+						}
+					});
+		} catch (Throwable e) {
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new CollectorException(e);
+		} finally {
+			shutdown();
+			report.ongoing(counter.longValue(), counter.longValue());
+		}
+	}
+
+	private void scheduleReport(AtomicInteger counter) {
+		schedule(new ReporterCallback() {
+			@Override
+			public Long getCurrent() {
+				return counter.longValue();
+			}
+
+			@Override
+			public Long getTotal() {
+				return null;
+			}
+		});
+	}
+
+	private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException {
+
+		switch (CollectorPlugin.NAME.valueOf(api.getProtocol())) {
+			case oai:
+				return new OaiCollectorPlugin(clientParams);
+			case rest_json2xml:
+				return new RestCollectorPlugin(clientParams);
+			case other:
+				final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
+					.ofNullable(api.getParams().get("other_plugin_type"))
+					.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
+					.get();
+
+				switch (plugin) {
+					case mdstore_mongodb_dump:
+						return new MongoDbDumpCollectorPlugin(fileSystem);
+					case mdstore_mongodb:
+						return new MDStoreCollectorPlugin();
+					default:
+						throw new UnknownCollectorPluginException("plugin is not managed: " + plugin);
+				}
+			default:
+				throw new UnknownCollectorPluginException("protocol is not managed: " + api.getProtocol());
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
@ -0,0 +1,135 @@
+
+package eu.dnetlib.dhp.collection;
+
+import static eu.dnetlib.dhp.common.Constants.*;
+import static eu.dnetlib.dhp.utils.DHPUtils.*;
+
+import java.io.IOException;
+import java.util.Optional;
+
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.message.MessageSender;
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
+
+/**
+ * CollectorWorkerApplication is the main class responsible to start the metadata collection process, storing the outcomes
+ * into HDFS. This application will be executed on the hadoop cluster, where invoked in the context of the metadata collection
+ * oozie workflow, it will receive all the input parameters necessary to instantiate the specific collection plugin and the
+ * relative specific configurations
+ *
+ * @author Sandro La Bruzzo, Claudio Atzori
+ */
+public class CollectorWorkerApplication {
+
+	private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
+
+	private final FileSystem fileSystem;
+
+	public CollectorWorkerApplication(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(final String[] args)
+		throws ParseException, IOException, UnknownCollectorPluginException, CollectorException {
+
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					CollectorWorkerApplication.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/collection/collector_worker_input_parameter.json")));
+		argumentParser.parseArgument(args);
+
+		log.info("Java Xmx: {}m", Runtime.getRuntime().maxMemory() / (1024 * 1024));
+
+		final String hdfsuri = argumentParser.get("namenode");
+		log.info("hdfsURI is {}", hdfsuri);
+
+		final String apiDescriptor = argumentParser.get("apidescriptor");
+		log.info("apiDescriptor is {}", apiDescriptor);
+
+		final String mdStoreVersion = argumentParser.get("mdStoreVersion");
+		log.info("mdStoreVersion is {}", mdStoreVersion);
+
+		final String dnetMessageManagerURL = argumentParser.get(DNET_MESSAGE_MGR_URL);
+		log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL);
+
+		final String workflowId = argumentParser.get("workflowId");
+		log.info("workflowId is {}", workflowId);
+
+		final HttpClientParams clientParams = getClientParams(argumentParser);
+
+		final ApiDescriptor api = MAPPER.readValue(apiDescriptor, ApiDescriptor.class);
+		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
+
+		new CollectorWorkerApplication(fileSystem)
+			.run(mdStoreVersion, clientParams, api, dnetMessageManagerURL, workflowId);
+	}
+
+	protected void run(String mdStoreVersion, HttpClientParams clientParams, ApiDescriptor api,
+		String dnetMessageManagerURL, String workflowId)
+		throws IOException, CollectorException, UnknownCollectorPluginException {
+
+		final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class);
+		final MessageSender ms = new MessageSender(dnetMessageManagerURL, workflowId);
+
+		try (AggregatorReport report = new AggregatorReport(ms)) {
+			new CollectorWorker(api, fileSystem, currentVersion, clientParams, report).collect();
+		}
+	}
+
+	private static HttpClientParams getClientParams(ArgumentApplicationParser argumentParser) {
+		final HttpClientParams clientParams = new HttpClientParams();
+		clientParams
+			.setMaxNumberOfRetry(
+				Optional
+					.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
+					.map(Integer::parseInt)
+					.orElse(HttpClientParams._maxNumberOfRetry));
+		log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
+
+		clientParams
+			.setRequestDelay(
+				Optional
+					.ofNullable(argumentParser.get(REQUEST_DELAY))
+					.map(Integer::parseInt)
+					.orElse(HttpClientParams._requestDelay));
+		log.info("requestDelay is {}", clientParams.getRequestDelay());
+
+		clientParams
+			.setRetryDelay(
+				Optional
+					.ofNullable(argumentParser.get(RETRY_DELAY))
+					.map(Integer::parseInt)
+					.orElse(HttpClientParams._retryDelay));
+		log.info("retryDelay is {}", clientParams.getRetryDelay());
+
+		clientParams
+			.setConnectTimeOut(
+				Optional
+					.ofNullable(argumentParser.get(CONNECT_TIMEOUT))
+					.map(Integer::parseInt)
+					.orElse(HttpClientParams._connectTimeOut));
+		log.info("connectTimeOut is {}", clientParams.getConnectTimeOut());
+
+		clientParams
+			.setReadTimeOut(
+				Optional
+					.ofNullable(argumentParser.get(READ_TIMEOUT))
+					.map(Integer::parseInt)
+					.orElse(HttpClientParams._readTimeOut));
+		log.info("readTimeOut is {}", clientParams.getReadTimeOut());
+		return clientParams;
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
@ -1,28 +1,26 @@

 package eu.dnetlib.dhp.collection;

+import static eu.dnetlib.dhp.common.Constants.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+import static eu.dnetlib.dhp.utils.DHPUtils.*;

 import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;

-import org.apache.commons.cli.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SparkSession;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.expressions.Aggregator;
 import org.apache.spark.util.LongAccumulator;
 import org.dom4j.Document;
 import org.dom4j.Node;
@ -30,19 +28,172 @@ import org.dom4j.io.SAXReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.fasterxml.jackson.databind.ObjectMapper;
-
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.model.mdstore.Provenance;
-import eu.dnetlib.message.Message;
-import eu.dnetlib.message.MessageManager;
-import eu.dnetlib.message.MessageType;
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.Provenance;
+import scala.Tuple2;

 public class GenerateNativeStoreSparkJob {

 	private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class);

+	public static void main(String[] args) throws Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GenerateNativeStoreSparkJob.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/collection/generate_native_input_parameters.json")));
+		parser.parseArgument(args);
+
+		final String provenanceArgument = parser.get("provenance");
+		log.info("Provenance is {}", provenanceArgument);
+		final Provenance provenance = MAPPER.readValue(provenanceArgument, Provenance.class);
+
+		final String dateOfCollectionArgs = parser.get("dateOfCollection");
+		log.info("dateOfCollection is {}", dateOfCollectionArgs);
+		final Long dateOfCollection = new Long(dateOfCollectionArgs);
+
+		String mdStoreVersion = parser.get("mdStoreVersion");
+		log.info("mdStoreVersion is {}", mdStoreVersion);
+
+		final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class);
+
+		String readMdStoreVersionParam = parser.get("readMdStoreVersion");
+		log.info("readMdStoreVersion is {}", readMdStoreVersionParam);
+
+		final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null
+			: MAPPER.readValue(readMdStoreVersionParam, MDStoreVersion.class);
+
+		final String xpath = parser.get("xpath");
+		log.info("xpath is {}", xpath);
+
+		final String encoding = parser.get("encoding");
+		log.info("encoding is {}", encoding);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> createNativeMDStore(
+				spark, provenance, dateOfCollection, xpath, encoding, currentVersion, readMdStoreVersion));
+	}
+
+	private static void createNativeMDStore(SparkSession spark,
+		Provenance provenance,
+		Long dateOfCollection,
+		String xpath,
+		String encoding,
+		MDStoreVersion currentVersion,
+		MDStoreVersion readVersion) throws IOException {
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		final LongAccumulator totalItems = sc.sc().longAccumulator(CONTENT_TOTALITEMS);
+		final LongAccumulator invalidRecords = sc.sc().longAccumulator(CONTENT_INVALIDRECORDS);
+
+		final String seqFilePath = currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
+		final JavaRDD<MetadataRecord> nativeStore = sc
+			.sequenceFile(seqFilePath, IntWritable.class, Text.class)
+			.map(
+				item -> parseRecord(
+					item._2().toString(),
+					xpath,
+					encoding,
+					provenance,
+					dateOfCollection,
+					totalItems,
+					invalidRecords))
+			.filter(Objects::nonNull)
+			.distinct();
+
+		final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
+		final Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder);
+
+		final String targetPath = currentVersion.getHdfsPath() + MDSTORE_DATA_PATH;
+
+		if (readVersion != null) { // INCREMENTAL MODE
+			log.info("updating {} incrementally with {}", targetPath, readVersion.getHdfsPath());
+			Dataset<MetadataRecord> currentMdStoreVersion = spark
+				.read()
+				.load(readVersion.getHdfsPath() + MDSTORE_DATA_PATH)
+				.as(encoder);
+			TypedColumn<MetadataRecord, MetadataRecord> aggregator = new MDStoreAggregator().toColumn();
+
+			final Dataset<MetadataRecord> map = currentMdStoreVersion
+				.union(mdstore)
+				.groupByKey(
+					(MapFunction<MetadataRecord, String>) MetadataRecord::getId,
+					Encoders.STRING())
+				.agg(aggregator)
+				.map((MapFunction<Tuple2<String, MetadataRecord>, MetadataRecord>) Tuple2::_2, encoder);
+
+			map.select("id").takeAsList(100).forEach(s -> log.info(s.toString()));
+
+			saveDataset(map, targetPath);
+
+		} else {
+			saveDataset(mdstore, targetPath);
+		}
+
+		final Long total = spark.read().load(targetPath).count();
+		log.info("collected {} records for datasource '{}'", total, provenance.getDatasourceName());
+
+		writeHdfsFile(
+			spark.sparkContext().hadoopConfiguration(), total.toString(),
+			currentVersion.getHdfsPath() + MDSTORE_SIZE_PATH);
+	}
+
+	public static class MDStoreAggregator extends Aggregator<MetadataRecord, MetadataRecord, MetadataRecord> {
+
+		@Override
+		public MetadataRecord zero() {
+			return null;
+		}
+
+		@Override
+		public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) {
+			return getLatestRecord(b, a);
+		}
+
+		@Override
+		public MetadataRecord merge(MetadataRecord b, MetadataRecord a) {
+			return getLatestRecord(b, a);
+		}
+
+		private MetadataRecord getLatestRecord(MetadataRecord b, MetadataRecord a) {
+			if (b == null)
+				return a;
+
+			if (a == null)
+				return b;
+			return (a.getDateOfCollection() > b.getDateOfCollection()) ? a : b;
+		}
+
+		@Override
+		public MetadataRecord finish(MetadataRecord r) {
+			return r;
+		}
+
+		@Override
+		public Encoder<MetadataRecord> bufferEncoder() {
+			return Encoders.bean(MetadataRecord.class);
+		}
+
+		@Override
+		public Encoder<MetadataRecord> outputEncoder() {
+			return Encoders.bean(MetadataRecord.class);
+		}
+
+	}
+
 	public static MetadataRecord parseRecord(
 		final String input,
 		final String xpath,
@ -64,112 +215,11 @@ public class GenerateNativeStoreSparkJob {
 					invalidRecords.add(1);
 				return null;
 			}
-			return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection);
+			return new MetadataRecord(originalIdentifier, encoding, provenance, document.asXML(), dateOfCollection);
 		} catch (Throwable e) {
-			if (invalidRecords != null)
-				invalidRecords.add(1);
-			e.printStackTrace();
+			invalidRecords.add(1);
 			return null;
 		}
 	}

-	public static void main(String[] args) throws Exception {
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					GenerateNativeStoreSparkJob.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
-		parser.parseArgument(args);
-		final ObjectMapper jsonMapper = new ObjectMapper();
-		final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class);
-		final long dateOfCollection = new Long(parser.get("dateOfCollection"));
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final Map<String, String> ongoingMap = new HashMap<>();
-		final Map<String, String> reportMap = new HashMap<>();
-
-		final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-				final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-				final JavaPairRDD<IntWritable, Text> inputRDD = sc
-					.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
-
-				final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
-				final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
-
-				final MessageManager manager = new MessageManager(
-					parser.get("rabbitHost"),
-					parser.get("rabbitUser"),
-					parser.get("rabbitPassword"),
-					false,
-					false,
-					null);
-
-				final JavaRDD<MetadataRecord> mappeRDD = inputRDD
-					.map(
-						item -> parseRecord(
-							item._2().toString(),
-							parser.get("xpath"),
-							parser.get("encoding"),
-							provenance,
-							dateOfCollection,
-							totalItems,
-							invalidRecords))
-					.filter(Objects::nonNull)
-					.distinct();
-
-				ongoingMap.put("ongoing", "0");
-				if (!test) {
-					manager
-						.sendMessage(
-							new Message(
-								parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap),
-							parser.get("rabbitOngoingQueue"),
-							true,
-							false);
-				}
-
-				final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
-				final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
-				final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
-				mdStoreRecords.add(mdstore.count());
-				ongoingMap.put("ongoing", "" + totalItems.value());
-				if (!test) {
-					manager
-						.sendMessage(
-							new Message(
-								parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap),
-							parser.get("rabbitOngoingQueue"),
-							true,
-							false);
-				}
-				mdstore.write().format("parquet").save(parser.get("output"));
-				reportMap.put("inputItem", "" + totalItems.value());
-				reportMap.put("invalidRecords", "" + invalidRecords.value());
-				reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
-				if (!test) {
-					manager
-						.sendMessage(
-							new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
-							parser.get("rabbitReportQueue"),
-							true,
-							false);
-					manager.close();
-				}
-			});
-
-	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
@ -0,0 +1,94 @@
+
+package eu.dnetlib.dhp.collection;
+
+/**
+ * Bundles the http connection parameters driving the client behaviour.
+ */
+public class HttpClientParams {
+
+	// Defaults
+	public static int _maxNumberOfRetry = 3;
+	public static int _requestDelay = 0; // milliseconds
+	public static int _retryDelay = 10; // seconds
+	public static int _connectTimeOut = 10; // seconds
+	public static int _readTimeOut = 30; // seconds
+
+	/**
+	 * Maximum number of allowed retires before failing
+	 */
+	private int maxNumberOfRetry;
+
+	/**
+	 * Delay between request (Milliseconds)
+	 */
+	private int requestDelay;
+
+	/**
+	 * Time to wait after a failure before retrying (Seconds)
+	 */
+	private int retryDelay;
+
+	/**
+	 * Connect timeout (Seconds)
+	 */
+	private int connectTimeOut;
+
+	/**
+	 * Read timeout (Seconds)
+	 */
+	private int readTimeOut;
+
+	public HttpClientParams() {
+		this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
+	}
+
+	public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
+		int readTimeOut) {
+		this.maxNumberOfRetry = maxNumberOfRetry;
+		this.requestDelay = requestDelay;
+		this.retryDelay = retryDelay;
+		this.connectTimeOut = connectTimeOut;
+		this.readTimeOut = readTimeOut;
+	}
+
+	public int getMaxNumberOfRetry() {
+		return maxNumberOfRetry;
+	}
+
+	public void setMaxNumberOfRetry(int maxNumberOfRetry) {
+		this.maxNumberOfRetry = maxNumberOfRetry;
+	}
+
+	public int getRequestDelay() {
+		return requestDelay;
+	}
+
+	public void setRequestDelay(int requestDelay) {
+		this.requestDelay = requestDelay;
+	}
+
+	public int getRetryDelay() {
+		return retryDelay;
+	}
+
+	public void setRetryDelay(int retryDelay) {
+		this.retryDelay = retryDelay;
+	}
+
+	public void setConnectTimeOut(int connectTimeOut) {
+		this.connectTimeOut = connectTimeOut;
+	}
+
+	public int getConnectTimeOut() {
+		return connectTimeOut;
+	}
+
+	public int getReadTimeOut() {
+		return readTimeOut;
+	}
+
+	public void setReadTimeOut(int readTimeOut) {
+		this.readTimeOut = readTimeOut;
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
@ -0,0 +1,259 @@
+
+package eu.dnetlib.dhp.collection;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.*;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.*;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.http.HttpHeaders;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+
+/**
+ * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
+ *
+ * @author jochen, michele, andrea, alessia, claudio
+ */
+public class HttpConnector2 {
+
+	private static final Logger log = LoggerFactory.getLogger(HttpConnector2.class);
+
+	private static final String REPORT_PREFIX = "http:";
+
+	private HttpClientParams clientParams;
+
+	private String responseType = null;
+
+	private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
+
+	public HttpConnector2() {
+		this(new HttpClientParams());
+	}
+
+	public HttpConnector2(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
+	}
+
+	/**
+	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
+	 */
+	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorException {
+		return IOUtils.toInputStream(getInputSource(requestUrl));
+	}
+
+	/**
+	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
+	 */
+	public String getInputSource(final String requestUrl) throws CollectorException {
+		return attemptDownloadAsString(requestUrl, 1, new AggregatorReport());
+	}
+
+	/**
+	 * Given the URL returns the content via HTTP GET
+	 *
+	 * @param requestUrl the URL
+	 * @param report the list of errors
+	 * @return the content of the downloaded resource
+	 * @throws CollectorException when retrying more than maxNumberOfRetry times
+	 */
+	public String getInputSource(final String requestUrl, AggregatorReport report)
+		throws CollectorException {
+		return attemptDownloadAsString(requestUrl, 1, report);
+	}
+
+	private String attemptDownloadAsString(final String requestUrl, final int retryNumber,
+		final AggregatorReport report) throws CollectorException {
+
+		try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) {
+			return IOUtils.toString(s);
+		} catch (IOException e) {
+			log.error(e.getMessage(), e);
+			throw new CollectorException(e);
+		}
+	}
+
+	private InputStream attemptDownload(final String requestUrl, final int retryNumber,
+		final AggregatorReport report) throws CollectorException, IOException {
+
+		if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
+			final String msg = String
+				.format(
+					"Max number of retries (%s/%s) exceeded, failing.",
+					retryNumber, getClientParams().getMaxNumberOfRetry());
+			log.error(msg);
+			throw new CollectorException(msg);
+		}
+
+		log.info("Request attempt {} [{}]", retryNumber, requestUrl);
+
+		InputStream input = null;
+
+		try {
+			if (getClientParams().getRequestDelay() > 0) {
+				backoffAndSleep(getClientParams().getRequestDelay());
+			}
+			final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
+			urlConn.setInstanceFollowRedirects(false);
+			urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
+			urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
+			urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
+
+			if (log.isDebugEnabled()) {
+				logHeaderFields(urlConn);
+			}
+
+			int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
+			if (is2xx(urlConn.getResponseCode())) {
+				input = urlConn.getInputStream();
+				responseType = urlConn.getContentType();
+				return input;
+			}
+			if (is3xx(urlConn.getResponseCode())) {
+				// REDIRECTS
+				final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
+				log.info(String.format("The requested url has been moved to %s", newUrl));
+				report
+					.put(
+						REPORT_PREFIX + urlConn.getResponseCode(),
+						String.format("Moved to: %s", newUrl));
+				urlConn.disconnect();
+				if (retryAfter > 0) {
+					backoffAndSleep(retryAfter);
+				}
+				return attemptDownload(newUrl, retryNumber + 1, report);
+			}
+			if (is4xx(urlConn.getResponseCode())) {
+				// CLIENT ERROR, DO NOT RETRY
+				report
+					.put(
+						REPORT_PREFIX + urlConn.getResponseCode(),
+						String
+							.format(
+								"%s error: %s", requestUrl, urlConn.getResponseMessage()));
+				throw new CollectorException("4xx error: request will not be repeated. " + report);
+			}
+			if (is5xx(urlConn.getResponseCode())) {
+				// SERVER SIDE ERRORS RETRY ONLY on 503
+				switch (urlConn.getResponseCode()) {
+					case HttpURLConnection.HTTP_UNAVAILABLE:
+						if (retryAfter > 0) {
+							log
+								.warn(
+									requestUrl + " - waiting and repeating request after suggested retry-after "
+										+ retryAfter + " sec.");
+							backoffAndSleep(retryAfter * 1000);
+						} else {
+							log
+								.warn(
+									requestUrl + " - waiting and repeating request after default delay of "
+										+ getClientParams().getRetryDelay() + " sec.");
+							backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
+						}
+						report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
+						urlConn.disconnect();
+						return attemptDownload(requestUrl, retryNumber + 1, report);
+					default:
+						report
+							.put(
+								REPORT_PREFIX + urlConn.getResponseCode(),
+								String
+									.format(
+										"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
+						throw new CollectorException(urlConn.getResponseCode() + " error " + report);
+				}
+			}
+			throw new CollectorException(
+				String
+					.format(
+						"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
+						MAPPER.writeValueAsString(report)));
+		} catch (MalformedURLException | UnknownHostException e) {
+			log.error(e.getMessage(), e);
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new CollectorException(e.getMessage(), e);
+		} catch (SocketTimeoutException | SocketException e) {
+			log.error(e.getMessage(), e);
+			report.put(e.getClass().getName(), e.getMessage());
+			backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
+			return attemptDownload(requestUrl, retryNumber + 1, report);
+		}
+	}
+
+	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
+		log.debug("StatusCode: " + urlConn.getResponseMessage());
+
+		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
+			if (e.getKey() != null) {
+				for (String v : e.getValue()) {
+					log.debug("  key: " + e.getKey() + " - value: " + v);
+				}
+			}
+		}
+	}
+
+	private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
+		log.info("I'm going to sleep for {}ms", sleepTimeMs);
+		try {
+			Thread.sleep(sleepTimeMs);
+		} catch (InterruptedException e) {
+			log.error(e.getMessage(), e);
+			throw new CollectorException(e);
+		}
+	}
+
+	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0)
+				&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
+				return Integer.parseInt(headerMap.get(key).get(0)) + 10;
+			}
+		}
+		return -1;
+	}
+
+	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorException {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) {
+				return headerMap.get(key).get(0);
+			}
+		}
+		throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING");
+	}
+
+	private boolean is2xx(final int statusCode) {
+		return statusCode >= 200 && statusCode <= 299;
+	}
+
+	private boolean is4xx(final int statusCode) {
+		return statusCode >= 400 && statusCode <= 499;
+	}
+
+	private boolean is3xx(final int statusCode) {
+		return statusCode >= 300 && statusCode <= 399;
+	}
+
+	private boolean is5xx(final int statusCode) {
+		return statusCode >= 500 && statusCode <= 599;
+	}
+
+	public String getResponseType() {
+		return responseType;
+	}
+
+	public HttpClientParams getClientParams() {
+		return clientParams;
+	}
+
+	public void setClientParams(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/JsonUtils.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/JsonUtils.java
@ -0,0 +1,84 @@
+
+package eu.dnetlib.dhp.collection;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class JsonUtils {
+
+	private static final Log log = LogFactory.getLog(JsonUtils.class);
+
+	public static final String wrapName = "recordWrap";
+
+	/**
+	 * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
+	 * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
+	 * and work-around for the JSON to XML converting of org.json.XML-package.
+	 *
+	 * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
+	 *
+	 * @param jsonInput
+	 * @return convertedJsonKeynameOutput
+	 */
+	public String syntaxConvertJsonKeyNames(String jsonInput) {
+
+		log.trace("before convertJsonKeyNames: " + jsonInput);
+		// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
+		// replace ' 's in JSON Namens with '_'
+		while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
+		}
+
+		// replace forward-slash (sign '/' ) in JSON Names with '_'
+		while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
+		}
+
+		// replace '(' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ')' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// add prefix of startNumbers in JSON Keynames with 'n_'
+		while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
+		}
+		// add prefix of only numbers in JSON Keynames with 'm_'
+		while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
+		}
+
+		// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
+		while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
+		// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
+		// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
+		// }
+
+		// replace '=' in JSON Keynames with '-'
+		while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
+		}
+
+		log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
+		return jsonInput;
+	}
+
+	public String convertToXML(final String jsonRecord) {
+		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+		org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
+		resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
+		log.trace("before inputStream: " + resultXml);
+		resultXml = XmlCleaner.cleanAllEntities(resultXml);
+		log.trace("after cleaning: " + resultXml);
+		return resultXml;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/UnknownCollectorPluginException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/UnknownCollectorPluginException.java
@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.collection;
+
+public class UnknownCollectorPluginException extends Exception {
+
+	/** */
+	private static final long serialVersionUID = -290723075076039757L;
+
+	public UnknownCollectorPluginException() {
+		super();
+	}
+
+	public UnknownCollectorPluginException(
+		final String message,
+		final Throwable cause,
+		final boolean enableSuppression,
+		final boolean writableStackTrace) {
+		super(message, cause, enableSuppression, writableStackTrace);
+	}
+
+	public UnknownCollectorPluginException(final String message, final Throwable cause) {
+		super(message, cause);
+	}
+
+	public UnknownCollectorPluginException(final String message) {
+		super(message);
+	}
+
+	public UnknownCollectorPluginException(final Throwable cause) {
+		super(cause);
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.collection.worker.utils;
+package eu.dnetlib.dhp.collection;

 import java.util.HashMap;
 import java.util.HashSet;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@ -3,10 +3,21 @@ package eu.dnetlib.dhp.collection.plugin;

 import java.util.stream.Stream;

-import eu.dnetlib.collector.worker.model.ApiDescriptor;
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.CollectorException;

 public interface CollectorPlugin {

-	Stream<String> collect(ApiDescriptor api) throws DnetCollectorException;
+	enum NAME {
+		oai, other, rest_json2xml;
+
+		public enum OTHER_NAME {
+			mdstore_mongodb_dump, mdstore_mongodb
+		}
+
+	}
+
+	Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException;
+
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
@ -0,0 +1,60 @@
+
+package eu.dnetlib.dhp.collection.plugin.mongodb;
+
+import java.util.Optional;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.bson.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+import com.mongodb.client.MongoCollection;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.common.MdstoreClient;
+
+public class MDStoreCollectorPlugin implements CollectorPlugin {
+
+	private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class);
+
+	public static final String MONGODB_DBNAME = "mongodb_dbname";
+	public static final String MDSTORE_ID = "mdstore_id";
+
+	@Override
+	public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
+
+		final String mongoBaseUrl = Optional
+			.ofNullable(api.getBaseUrl())
+			.orElseThrow(
+				() -> new CollectorException(
+					"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
+		log.info("mongoBaseUrl: {}", mongoBaseUrl);
+
+		final String dbName = Optional
+			.ofNullable(api.getParams().get(MONGODB_DBNAME))
+			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
+		log.info("dbName: {}", dbName);
+
+		final String mdId = Optional
+			.ofNullable(api.getParams().get(MDSTORE_ID))
+			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
+		log.info("mdId: {}", mdId);
+
+		final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
+		final MongoCollection<Document> mdstore = client.mdStore(mdId);
+		long size = mdstore.count();
+
+		return StreamSupport
+			.stream(
+				Spliterators.spliterator(mdstore.find().iterator(), size, Spliterator.SIZED), false)
+			.map(doc -> doc.getString("body"));
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.dhp.collection.plugin.mongodb;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.Optional;
+import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
+
+	public static final String PATH_PARAM = "path";
+	public static final String BODY_JSONPATH = "$.body";
+
+	public FileSystem fileSystem;
+
+	public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	@Override
+	public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
+
+		final Path path = Optional
+			.ofNullable(api.getParams().get("path"))
+			.map(Path::new)
+			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", PATH_PARAM)));
+
+		try {
+			if (!fileSystem.exists(path)) {
+				throw new CollectorException("path does not exist: " + path.toString());
+			}
+
+			return new BufferedReader(
+				new InputStreamReader(new GZIPInputStream(fileSystem.open(path)), Charset.defaultCharset()))
+					.lines()
+					.map(s -> DHPUtils.getJPathString(BODY_JSONPATH, s));
+
+		} catch (IOException e) {
+			throw new CollectorException(e);
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
@ -13,9 +13,11 @@ import com.google.common.base.Splitter;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;

-import eu.dnetlib.collector.worker.model.ApiDescriptor;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.HttpClientParams;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;

 public class OaiCollectorPlugin implements CollectorPlugin {

@ -26,8 +28,15 @@ public class OaiCollectorPlugin implements CollectorPlugin {

 	private OaiIteratorFactory oaiIteratorFactory;

+	private HttpClientParams clientParams;
+
+	public OaiCollectorPlugin(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+	}
+
 	@Override
-	public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException {
+	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report)
+		throws CollectorException {
 		final String baseUrl = api.getBaseUrl();
 		final String mdFormat = api.getParams().get(FORMAT_PARAM);
 		final String setParam = api.getParams().get(OAI_SET_PARAM);
@ -46,26 +55,26 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 		}

 		if (baseUrl == null || baseUrl.isEmpty()) {
-			throw new DnetCollectorException("Param 'baseurl' is null or empty");
+			throw new CollectorException("Param 'baseurl' is null or empty");
 		}

 		if (mdFormat == null || mdFormat.isEmpty()) {
-			throw new DnetCollectorException("Param 'mdFormat' is null or empty");
+			throw new CollectorException("Param 'mdFormat' is null or empty");
 		}

 		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
-			throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
+			throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
 		}

 		if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
-			throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
+			throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
 		}

 		final Iterator<Iterator<String>> iters = sets
 			.stream()
 			.map(
 				set -> getOaiIteratorFactory()
-					.newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
+					.newIterator(baseUrl, mdFormat, set, fromDate, untilDate, getClientParams(), report))
 			.iterator();

 		return StreamSupport
@ -79,4 +88,12 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 		}
 		return oaiIteratorFactory;
 	}
+
+	public HttpClientParams getClientParams() {
+		return clientParams;
+	}
+
+	public void setClientParams(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@ -1,7 +1,9 @@

 package eu.dnetlib.dhp.collection.plugin.oai;

+import java.io.IOException;
 import java.io.StringReader;
+import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.util.Iterator;
@ -9,24 +11,28 @@ import java.util.Queue;
 import java.util.concurrent.PriorityBlockingQueue;

 import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
+import org.dom4j.DocumentHelper;
 import org.dom4j.Node;
+import org.dom4j.io.OutputFormat;
 import org.dom4j.io.SAXReader;
+import org.dom4j.io.XMLWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
-import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
-import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.HttpConnector2;
+import eu.dnetlib.dhp.collection.XmlCleaner;

 public class OaiIterator implements Iterator<String> {

-	private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on
-	// 11/24/08 5:02 PM
+	private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
+
+	private final static String REPORT_PREFIX = "oai:";

 	private final Queue<String> queue = new PriorityBlockingQueue<>();
-	private final SAXReader reader = new SAXReader();

 	private final String baseUrl;
 	private final String set;
@ -35,7 +41,8 @@ public class OaiIterator implements Iterator<String> {
 	private final String untilDate;
 	private String token;
 	private boolean started;
-	private final HttpConnector httpConnector;
+	private final HttpConnector2 httpConnector;
+	private final AggregatorReport report;

 	public OaiIterator(
 		final String baseUrl,
@ -43,7 +50,8 @@ public class OaiIterator implements Iterator<String> {
 		final String set,
 		final String fromDate,
 		final String untilDate,
-		final HttpConnector httpConnector) {
+		final HttpConnector2 httpConnector,
+		final AggregatorReport report) {
 		this.baseUrl = baseUrl;
 		this.mdFormat = mdFormat;
 		this.set = set;
@ -51,6 +59,7 @@ public class OaiIterator implements Iterator<String> {
 		this.untilDate = untilDate;
 		this.started = false;
 		this.httpConnector = httpConnector;
+		this.report = report;
 	}

 	private void verifyStarted() {
@ -58,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
 			this.started = true;
 			try {
 				this.token = firstPage();
-			} catch (final DnetCollectorException e) {
+			} catch (final CollectorException e) {
 				throw new RuntimeException(e);
 			}
 		}
@ -80,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
 			while (queue.isEmpty() && token != null && !token.isEmpty()) {
 				try {
 					token = otherPages(token);
-				} catch (final DnetCollectorException e) {
+				} catch (final CollectorException e) {
 					throw new RuntimeException(e);
 				}
 			}
@ -92,7 +101,7 @@ public class OaiIterator implements Iterator<String> {
 	public void remove() {
 	}

-	private String firstPage() throws DnetCollectorException {
+	private String firstPage() throws CollectorException {
 		try {
 			String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
 			if (set != null && !set.isEmpty()) {
@ -108,7 +117,8 @@ public class OaiIterator implements Iterator<String> {

 			return downloadPage(url);
 		} catch (final UnsupportedEncodingException e) {
-			throw new DnetCollectorException(e);
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new CollectorException(e);
 		}
 	}

@ -126,32 +136,35 @@ public class OaiIterator implements Iterator<String> {
 		return result.trim();
 	}

-	private String otherPages(final String resumptionToken) throws DnetCollectorException {
+	private String otherPages(final String resumptionToken) throws CollectorException {
 		try {
 			return downloadPage(
 				baseUrl
 					+ "?verb=ListRecords&resumptionToken="
 					+ URLEncoder.encode(resumptionToken, "UTF-8"));
 		} catch (final UnsupportedEncodingException e) {
-			throw new DnetCollectorException(e);
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new CollectorException(e);
 		}
 	}

-	private String downloadPage(final String url) throws DnetCollectorException {
+	private String downloadPage(final String url) throws CollectorException {

-		final String xml = httpConnector.getInputSource(url);
+		final String xml = httpConnector.getInputSource(url, report);
 		Document doc;
 		try {
-			doc = reader.read(new StringReader(xml));
+			doc = DocumentHelper.parseText(xml);
 		} catch (final DocumentException e) {
-			log.warn("Error parsing xml, I try to clean it: " + xml, e);
+			log.warn("Error parsing xml, I try to clean it. {}", e.getMessage());
+			report.put(e.getClass().getName(), e.getMessage());
 			final String cleaned = XmlCleaner.cleanAllEntities(xml);
 			try {
-				doc = reader.read(new StringReader(cleaned));
+				doc = DocumentHelper.parseText(xml);
 			} catch (final DocumentException e1) {
 				final String resumptionToken = extractResumptionToken(xml);
 				if (resumptionToken == null) {
-					throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1);
+					report.put(e1.getClass().getName(), e1.getMessage());
+					throw new CollectorException("Error parsing cleaned document:\n" + cleaned, e1);
 				}
 				return resumptionToken;
 			}
@ -159,19 +172,35 @@ public class OaiIterator implements Iterator<String> {

 		final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
 		if (errorNode != null) {
-			final String code = errorNode.valueOf("@code");
-			if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
-				log.warn("noRecordsMatch for oai call: " + url);
+			final String code = errorNode.valueOf("@code").trim();
+			if ("noRecordsMatch".equalsIgnoreCase(code)) {
+				final String msg = "noRecordsMatch for oai call : " + url;
+				log.warn(msg);
+				report.put(REPORT_PREFIX + code, msg);
 				return null;
 			} else {
-				throw new DnetCollectorException(code + " - " + errorNode.getText());
+				final String msg = code + " - " + errorNode.getText();
+				report.put(REPORT_PREFIX + "error", msg);
+				throw new CollectorException(msg);
 			}
 		}

 		for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
-			queue.add(((Node) o).asXML());
+			final StringWriter sw = new StringWriter();
+			final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
+			try {
+				writer.write((Node) o);
+				queue.add(sw.toString());
+			} catch (IOException e) {
+				report.put(e.getClass().getName(), e.getMessage());
+				throw new CollectorException("Error parsing XML record:\n" + ((Node) o).asXML(), e);
+			}
 		}

 		return doc.valueOf("//*[local-name()='resumptionToken']");
 	}
+
+	public AggregatorReport getReport() {
+		return report;
+	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
@ -3,24 +3,28 @@ package eu.dnetlib.dhp.collection.plugin.oai;

 import java.util.Iterator;

-import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.HttpClientParams;
+import eu.dnetlib.dhp.collection.HttpConnector2;

 public class OaiIteratorFactory {

-	private HttpConnector httpConnector;
+	private HttpConnector2 httpConnector;

 	public Iterator<String> newIterator(
 		final String baseUrl,
 		final String mdFormat,
 		final String set,
 		final String fromDate,
-		final String untilDate) {
-		return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
+		final String untilDate,
+		final HttpClientParams clientParams,
+		final AggregatorReport report) {
+		return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector(clientParams), report);
 	}

-	private HttpConnector getHttpConnector() {
+	private HttpConnector2 getHttpConnector(HttpClientParams clientParams) {
 		if (httpConnector == null)
-			httpConnector = new HttpConnector();
+			httpConnector = new HttpConnector2(clientParams);
 		return httpConnector;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
@ -0,0 +1,105 @@
+
+package eu.dnetlib.dhp.collection.plugin.rest;
+
+import java.util.Optional;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.HttpClientParams;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+
+/**
+ * TODO: delegate HTTP requests to the common HttpConnector2 implementation.
+ *
+ * @author 	js, Andreas Czerniak
+ * @date 	2020-04-09
+ *
+ */
+public class RestCollectorPlugin implements CollectorPlugin {
+
+	public static final String RESULT_SIZE_VALUE_DEFAULT = "100";
+
+	private final HttpClientParams clientParams;
+
+	public RestCollectorPlugin(HttpClientParams clientParams) {
+		this.clientParams = clientParams;
+	}
+
+	@Override
+	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
+		final String baseUrl = api.getBaseUrl();
+
+		final String resumptionType = api.getParams().get("resumptionType");
+		final String resumptionParam = api.getParams().get("resumptionParam");
+		final String resumptionXpath = api.getParams().get("resumptionXpath");
+		final String resultTotalXpath = api.getParams().get("resultTotalXpath");
+		final String resultFormatParam = api.getParams().get("resultFormatParam");
+		final String resultFormatValue = api.getParams().get("resultFormatValue");
+		final String resultSizeParam = api.getParams().get("resultSizeParam");
+		final String queryParams = api.getParams().get("queryParams");
+		final String entityXpath = api.getParams().get("entityXpath");
+		final String authMethod = api.getParams().get("authMethod");
+		final String authToken = api.getParams().get("authToken");
+		final String resultSizeValue = Optional
+			.ofNullable(api.getParams().get("resultSizeValue"))
+			.filter(StringUtils::isNotBlank)
+			.orElse(RESULT_SIZE_VALUE_DEFAULT);
+
+		if (StringUtils.isBlank(baseUrl)) {
+			throw new CollectorException("Param 'baseUrl' is null or empty");
+		}
+		if (StringUtils.isBlank(resumptionType)) {
+			throw new CollectorException("Param 'resumptionType' is null or empty");
+		}
+		if (StringUtils.isBlank(resumptionParam)) {
+			throw new CollectorException("Param 'resumptionParam' is null or empty");
+		}
+		if (StringUtils.isBlank(resultFormatValue)) {
+			throw new CollectorException("Param 'resultFormatValue' is null or empty");
+		}
+		if (StringUtils.isBlank(queryParams)) {
+			throw new CollectorException("Param 'queryParams' is null or empty");
+		}
+		if (StringUtils.isBlank(entityXpath)) {
+			throw new CollectorException("Param 'entityXpath' is null or empty");
+		}
+
+		final String resultOutputFormat = Optional
+			.ofNullable(api.getParams().get("resultOutputFormat"))
+			.map(String::toLowerCase)
+			.filter(StringUtils::isNotBlank)
+			.orElse(resultFormatValue.toLowerCase());
+
+		RestIterator it = new RestIterator(
+			getClientParams(),
+			baseUrl,
+			resumptionType,
+			resumptionParam,
+			resumptionXpath,
+			resultTotalXpath,
+			resultFormatParam,
+			resultFormatValue,
+			resultSizeParam,
+			resultSizeValue,
+			queryParams,
+			entityXpath,
+			authMethod,
+			authToken,
+			resultOutputFormat);
+
+		return StreamSupport
+			.stream(
+				Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false);
+	}
+
+	public HttpClientParams getClientParams() {
+		return clientParams;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@ -0,0 +1,411 @@
+
+package eu.dnetlib.dhp.collection.plugin.rest;
+
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.Iterator;
+import java.util.Queue;
+import java.util.concurrent.PriorityBlockingQueue;
+
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.xpath.*;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpHeaders;
+import org.apache.http.entity.ContentType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+
+import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.collection.HttpClientParams;
+import eu.dnetlib.dhp.collection.JsonUtils;
+
+/**
+ * log.info(...) equal to  log.trace(...) in the application-logs
+ * <p>
+ * known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
+ *
+ * @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak
+ * @date 2020-04-09
+ *
+ */
+public class RestIterator implements Iterator<String> {
+
+	private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
+	public static final String UTF_8 = "UTF-8";
+
+	private final HttpClientParams clientParams;
+
+	private final String BASIC = "basic";
+
+	private final JsonUtils jsonUtils;
+
+	private final String baseUrl;
+	private final String resumptionType;
+	private final String resumptionParam;
+	private final String resultFormatValue;
+	private String queryParams;
+	private final int resultSizeValue;
+	private int resumptionInt = 0; // integer resumption token (first record to harvest)
+	private int resultTotal = -1;
+	private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest
+																	// or token scanned from results)
+	private InputStream resultStream;
+	private Transformer transformer;
+	private XPath xpath;
+	private String query;
+	private XPathExpression xprResultTotalPath;
+	private XPathExpression xprResumptionPath;
+	private XPathExpression xprEntity;
+	private final String queryFormat;
+	private final String querySize;
+	private final String authMethod;
+	private final String authToken;
+	private final Queue<String> recordQueue = new PriorityBlockingQueue<String>();
+	private int discoverResultSize = 0;
+	private int pagination = 1;
+	/*
+	 * While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in
+	 * json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
+	 * json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
+	 */
+	private final String resultOutputFormat;
+
+	/** RestIterator class
+	 *  compatible to version 1.3.33
+	 */
+	public RestIterator(
+		final HttpClientParams clientParams,
+		final String baseUrl,
+		final String resumptionType,
+		final String resumptionParam,
+		final String resumptionXpath,
+		final String resultTotalXpath,
+		final String resultFormatParam,
+		final String resultFormatValue,
+		final String resultSizeParam,
+		final String resultSizeValueStr,
+		final String queryParams,
+		final String entityXpath,
+		final String authMethod,
+		final String authToken,
+		final String resultOutputFormat) {
+
+		this.clientParams = clientParams;
+		this.jsonUtils = new JsonUtils();
+		this.baseUrl = baseUrl;
+		this.resumptionType = resumptionType;
+		this.resumptionParam = resumptionParam;
+		this.resultFormatValue = resultFormatValue;
+		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
+		this.queryParams = queryParams;
+		this.authMethod = authMethod;
+		this.authToken = authToken;
+		this.resultOutputFormat = resultOutputFormat;
+
+		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
+			: "";
+		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
+
+		try {
+			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
+		} catch (Exception e) {
+			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
+		}
+		initQueue();
+	}
+
+	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
+		throws TransformerConfigurationException, XPathExpressionException {
+		transformer = TransformerFactory.newInstance().newTransformer();
+		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
+		xpath = XPathFactory.newInstance().newXPath();
+		xprResultTotalPath = xpath.compile(resultTotalXpath);
+		xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
+		xprEntity = xpath.compile(entityXpath);
+	}
+
+	private void initQueue() {
+		query = baseUrl + "?" + queryParams + querySize + queryFormat;
+		log.info("REST calls starting with " + query);
+	}
+
+	private void disconnect() {
+		// TODO close inputstream
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * @see java.util.Iterator#hasNext()
+	 */
+	@Override
+	public boolean hasNext() {
+		if (recordQueue.isEmpty() && query.isEmpty()) {
+			disconnect();
+			return false;
+		} else {
+			return true;
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * @see java.util.Iterator#next()
+	 */
+	@Override
+	public String next() {
+		synchronized (recordQueue) {
+			while (recordQueue.isEmpty() && !query.isEmpty()) {
+				try {
+					query = downloadPage(query);
+				} catch (CollectorException e) {
+					log.debug("CollectorPlugin.next()-Exception: " + e);
+					throw new RuntimeException(e);
+				}
+			}
+			return recordQueue.poll();
+		}
+	}
+
+	/*
+	 * download page and return nextQuery
+	 */
+	private String downloadPage(String query) throws CollectorException {
+		String resultJson;
+		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+		String nextQuery = "";
+		String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
+		Node resultNode = null;
+		NodeList nodeList = null;
+		String qUrlArgument = "";
+		int urlOldResumptionSize = 0;
+		InputStream theHttpInputStream;
+
+		// check if cursor=* is initial set otherwise add it to the queryParam URL
+		if (resumptionType.equalsIgnoreCase("deep-cursor")) {
+			log.debug("check resumptionType deep-cursor and check cursor=*?" + query);
+			if (!query.contains("&cursor=")) {
+				query += "&cursor=*";
+			}
+		}
+
+		try {
+			log.info("requestig URL [{}]", query);
+
+			URL qUrl = new URL(query);
+			log.debug("authMethod :" + authMethod);
+			if ("bearer".equalsIgnoreCase(this.authMethod)) {
+				log.trace("authMethod before inputStream: " + resultXml);
+				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
+				conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
+				conn.setRequestMethod("GET");
+				theHttpInputStream = conn.getInputStream();
+			} else if (BASIC.equalsIgnoreCase(this.authMethod)) {
+				log.trace("authMethod before inputStream: " + resultXml);
+				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
+				conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
+				conn.setRequestMethod("GET");
+				theHttpInputStream = conn.getInputStream();
+			} else {
+				theHttpInputStream = qUrl.openStream();
+			}
+
+			resultStream = theHttpInputStream;
+			if ("json".equals(resultOutputFormat)) {
+				resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
+				resultXml = jsonUtils.convertToXML(resultJson);
+				resultStream = IOUtils.toInputStream(resultXml, UTF_8);
+			}
+
+			if (!(emptyXml).equalsIgnoreCase(resultXml)) {
+				resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
+				nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
+				log.debug("nodeList.length: " + nodeList.getLength());
+				for (int i = 0; i < nodeList.getLength(); i++) {
+					StringWriter sw = new StringWriter();
+					transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
+					String toEnqueue = sw.toString();
+					if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
+						log.warn("The following record resulted in empty item for the feeding queue: " + resultXml);
+					} else {
+						recordQueue.add(sw.toString());
+					}
+				}
+			} else {
+				log.warn("resultXml is equal with emptyXml");
+			}
+
+			resumptionInt += resultSizeValue;
+
+			switch (resumptionType.toLowerCase()) {
+				case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
+					resumptionStr = xprResumptionPath.evaluate(resultNode);
+					break;
+
+				case "count": // begin at one step for all records, iterate over items
+					resumptionStr = Integer.toString(resumptionInt);
+					break;
+
+				case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
+					if (resultSizeValue < 2) {
+						throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
+					}
+					qUrlArgument = qUrl.getQuery();
+					String[] arrayQUrlArgument = qUrlArgument.split("&");
+					for (String arrayUrlArgStr : arrayQUrlArgument) {
+						if (arrayUrlArgStr.startsWith(resumptionParam)) {
+							String[] resumptionKeyValue = arrayUrlArgStr.split("=");
+							if (isInteger(resumptionKeyValue[1])) {
+								urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
+								log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
+							} else {
+								log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
+							}
+						}
+					}
+
+					if (((emptyXml).equalsIgnoreCase(resultXml))
+						|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue))) {
+						// resumptionStr = "";
+						if (nodeList != null) {
+							discoverResultSize += nodeList.getLength();
+						}
+						resultTotal = discoverResultSize;
+					} else {
+						resumptionStr = Integer.toString(resumptionInt);
+						resultTotal = resumptionInt + 1;
+						if (nodeList != null) {
+							discoverResultSize += nodeList.getLength();
+						}
+					}
+					log.info("discoverResultSize:  {}", discoverResultSize);
+					break;
+
+				case "pagination":
+				case "page": // pagination, iterate over page numbers
+					pagination += 1;
+					if (nodeList != null) {
+						discoverResultSize += nodeList.getLength();
+					} else {
+						resultTotal = discoverResultSize;
+						pagination = discoverResultSize;
+					}
+					resumptionInt = pagination;
+					resumptionStr = Integer.toString(resumptionInt);
+					break;
+
+				case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in
+									// solr)
+					// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
+					// deep-cursor, Param 'resultSizeValue' is less than 2");}
+
+					resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
+					queryParams = queryParams.replace("&cursor=*", "");
+
+					// terminating if length of nodeList is 0
+					if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) {
+						resumptionInt += (nodeList.getLength() + 1 - resultSizeValue);
+					} else {
+						resumptionInt += (nodeList.getLength() - resultSizeValue); // subtract the resultSizeValue
+																					// because the iteration is over
+																					// real length and the
+																					// resultSizeValue is added before
+																					// the switch()
+					}
+
+					discoverResultSize = nodeList.getLength();
+
+					log
+						.debug(
+							"downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams="
+								+ queryParams + " resumptionLengthIncreased: " + resumptionInt);
+
+					break;
+
+				default: // otherwise: abort
+					// resultTotal = resumptionInt;
+					break;
+			}
+
+		} catch (Exception e) {
+			log.error(e.getMessage(), e);
+			throw new IllegalStateException("collection failed: " + e.getMessage());
+		}
+
+		try {
+			if (resultTotal == -1) {
+				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
+				if (resumptionType.equalsIgnoreCase("page") && !BASIC.equalsIgnoreCase(authMethod)) {
+					resultTotal += 1;
+				} // to correct the upper bound
+				log.info("resultTotal was -1 is now: " + resultTotal);
+			}
+		} catch (Exception e) {
+			log.error(e.getMessage(), e);
+			throw new IllegalStateException("downloadPage resultTotal couldn't parse: " + e.getMessage());
+		}
+		log.debug("resultTotal: " + resultTotal);
+		log.debug("resInt: " + resumptionInt);
+		if (resumptionInt <= resultTotal) {
+			nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr
+				+ queryFormat;
+		} else {
+			nextQuery = "";
+			// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the
+			// resumptionInt and prevent a NullPointer Exception at mdStore
+		}
+		log.debug("nextQueryUrl: " + nextQuery);
+		return nextQuery;
+
+	}
+
+	private boolean isInteger(String s) {
+		boolean isValidInteger = false;
+		try {
+			Integer.parseInt(s);
+
+			// s is a valid integer
+
+			isValidInteger = true;
+		} catch (NumberFormatException ex) {
+			// s is not an integer
+		}
+
+		return isValidInteger;
+	}
+
+	// Method to encode a string value using `UTF-8` encoding scheme
+	private String encodeValue(String value) {
+		try {
+			return URLEncoder.encode(value, StandardCharsets.UTF_8.toString());
+		} catch (UnsupportedEncodingException ex) {
+			throw new RuntimeException(ex.getCause());
+		}
+	}
+
+	public String getResultFormatValue() {
+		return resultFormatValue;
+	}
+
+	public String getResultOutputFormat() {
+		return resultOutputFormat;
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
@ -1,139 +0,0 @@
-
-package eu.dnetlib.dhp.collection.worker;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.collector.worker.model.ApiDescriptor;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
-import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
-import eu.dnetlib.message.Message;
-import eu.dnetlib.message.MessageManager;
-import eu.dnetlib.message.MessageType;
-
-public class DnetCollectorWorker {
-
-	private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
-
-	private final CollectorPluginFactory collectorPluginFactory;
-
-	private final ArgumentApplicationParser argumentParser;
-
-	private final MessageManager manager;
-
-	public DnetCollectorWorker(
-		final CollectorPluginFactory collectorPluginFactory,
-		final ArgumentApplicationParser argumentParser,
-		final MessageManager manager)
-		throws DnetCollectorException {
-		this.collectorPluginFactory = collectorPluginFactory;
-		this.argumentParser = argumentParser;
-		this.manager = manager;
-	}
-
-	public void collect() throws DnetCollectorException {
-		try {
-			final ObjectMapper jsonMapper = new ObjectMapper();
-			final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
-
-			final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
-
-			final String hdfsuri = argumentParser.get("namenode");
-
-			// ====== Init HDFS File System Object
-			Configuration conf = new Configuration();
-			// Set FileSystem URI
-			conf.set("fs.defaultFS", hdfsuri);
-			// Because of Maven
-			conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
-			conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
-
-			System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS"));
-			System.setProperty("hadoop.home.dir", "/");
-			// Get the filesystem - HDFS
-			FileSystem.get(URI.create(hdfsuri), conf);
-			Path hdfswritepath = new Path(argumentParser.get("hdfsPath"));
-
-			log.info("Created path " + hdfswritepath.toString());
-
-			final Map<String, String> ongoingMap = new HashMap<>();
-			final Map<String, String> reportMap = new HashMap<>();
-			final AtomicInteger counter = new AtomicInteger(0);
-			try (SequenceFile.Writer writer = SequenceFile
-				.createWriter(
-					conf,
-					SequenceFile.Writer.file(hdfswritepath),
-					SequenceFile.Writer.keyClass(IntWritable.class),
-					SequenceFile.Writer.valueClass(Text.class))) {
-				final IntWritable key = new IntWritable(counter.get());
-				final Text value = new Text();
-				plugin
-					.collect(api)
-					.forEach(
-						content -> {
-							key.set(counter.getAndIncrement());
-							value.set(content);
-							if (counter.get() % 10 == 0) {
-								try {
-									ongoingMap.put("ongoing", "" + counter.get());
-									log
-										.debug(
-											"Sending message: "
-												+ manager
-													.sendMessage(
-														new Message(
-															argumentParser.get("workflowId"),
-															"Collection",
-															MessageType.ONGOING,
-															ongoingMap),
-														argumentParser.get("rabbitOngoingQueue"),
-														true,
-														false));
-								} catch (Exception e) {
-									log.error("Error on sending message ", e);
-								}
-							}
-							try {
-								writer.append(key, value);
-							} catch (IOException e) {
-								throw new RuntimeException(e);
-							}
-						});
-			}
-			ongoingMap.put("ongoing", "" + counter.get());
-			manager
-				.sendMessage(
-					new Message(
-						argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap),
-					argumentParser.get("rabbitOngoingQueue"),
-					true,
-					false);
-			reportMap.put("collected", "" + counter.get());
-			manager
-				.sendMessage(
-					new Message(
-						argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
-					argumentParser.get("rabbitOngoingQueue"),
-					true,
-					false);
-			manager.close();
-		} catch (Throwable e) {
-			throw new DnetCollectorException("Error on collecting ", e);
-		}
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
@ -1,49 +0,0 @@
-
-package eu.dnetlib.dhp.collection.worker;
-
-import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
-import eu.dnetlib.message.MessageManager;
-
-/**
- * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module
- * will be executed on the hadoop cluster and taking in input some parameters that tells it which is the right collector
- * plugin to use and where store the data into HDFS path
- *
- * @author Sandro La Bruzzo
- */
-public class DnetCollectorWorkerApplication {
-
-	private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
-
-	private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
-
-	private static ArgumentApplicationParser argumentParser;
-
-	/** @param args */
-	public static void main(final String[] args) throws Exception {
-
-		argumentParser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					DnetCollectorWorker.class
-						.getResourceAsStream(
-							"/eu/dnetlib/collector/worker/collector_parameter.json")));
-		argumentParser.parseArgument(args);
-		log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
-		log.info("json = " + argumentParser.get("apidescriptor"));
-		final MessageManager manager = new MessageManager(
-			argumentParser.get("rabbitHost"),
-			argumentParser.get("rabbitUser"),
-			argumentParser.get("rabbitPassword"),
-			false,
-			false,
-			null);
-		final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
-		worker.collect();
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
@ -1,19 +0,0 @@
-
-package eu.dnetlib.dhp.collection.worker.utils;
-
-import java.util.LinkedList;
-
-public class CollectorPluginErrorLogList extends LinkedList<String> {
-
-	private static final long serialVersionUID = -6925786561303289704L;
-
-	@Override
-	public String toString() {
-		String log = "";
-		int index = 0;
-		for (final String errorMessage : this) {
-			log += String.format("Retry #%s: %s / ", index++, errorMessage);
-		}
-		return log;
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
@ -1,20 +0,0 @@
-
-package eu.dnetlib.dhp.collection.worker.utils;
-
-import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
-import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
-
-public class CollectorPluginFactory {
-
-	public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
-		if (protocol == null)
-			throw new DnetCollectorException("protocol cannot be null");
-		switch (protocol.toLowerCase().trim()) {
-			case "oai":
-				return new OaiCollectorPlugin();
-			default:
-				throw new DnetCollectorException("UNknown protocol");
-		}
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
@ -1,244 +0,0 @@
-
-package eu.dnetlib.dhp.collection.worker.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.*;
-import java.security.GeneralSecurityException;
-import java.security.cert.X509Certificate;
-import java.util.List;
-import java.util.Map;
-
-import javax.net.ssl.HttpsURLConnection;
-import javax.net.ssl.SSLContext;
-import javax.net.ssl.TrustManager;
-import javax.net.ssl.X509TrustManager;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.math.NumberUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
-
-public class HttpConnector {
-
-	private static final Log log = LogFactory.getLog(HttpConnector.class);
-
-	private int maxNumberOfRetry = 6;
-	private int defaultDelay = 120; // seconds
-	private int readTimeOut = 120; // seconds
-
-	private String responseType = null;
-
-	private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
-
-	public HttpConnector() {
-		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
-	}
-
-	/**
-	 * Given the URL returns the content via HTTP GET
-	 *
-	 * @param requestUrl the URL
-	 * @return the content of the downloaded resource
-	 * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
-	 */
-	public String getInputSource(final String requestUrl) throws DnetCollectorException {
-		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
-	}
-
-	/**
-	 * Given the URL returns the content as a stream via HTTP GET
-	 *
-	 * @param requestUrl the URL
-	 * @return the content of the downloaded resource as InputStream
-	 * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
-	 */
-	public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
-		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
-	}
-
-	private String attemptDownlaodAsString(
-		final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
-		throws DnetCollectorException {
-		try {
-			final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
-			try {
-				return IOUtils.toString(s);
-			} catch (final IOException e) {
-				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
-				Thread.sleep(defaultDelay * 1000);
-				errorList.add(e.getMessage());
-				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
-			} finally {
-				IOUtils.closeQuietly(s);
-			}
-		} catch (final InterruptedException e) {
-			throw new DnetCollectorException(e);
-		}
-	}
-
-	private InputStream attemptDownload(
-		final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
-		throws DnetCollectorException {
-
-		if (retryNumber > maxNumberOfRetry) {
-			throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList);
-		}
-
-		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
-		try {
-			InputStream input = null;
-
-			try {
-				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
-				urlConn.setInstanceFollowRedirects(false);
-				urlConn.setReadTimeout(readTimeOut * 1000);
-				urlConn.addRequestProperty("User-Agent", userAgent);
-
-				if (log.isDebugEnabled()) {
-					logHeaderFields(urlConn);
-				}
-
-				final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
-				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
-					log.warn("waiting and repeating request after " + retryAfter + " sec.");
-					Thread.sleep(retryAfter * 1000);
-					errorList.add("503 Service Unavailable");
-					urlConn.disconnect();
-					return attemptDownload(requestUrl, retryNumber + 1, errorList);
-				} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
-					|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
-					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
-					log.debug("The requested url has been moved to " + newUrl);
-					errorList
-						.add(
-							String
-								.format(
-									"%s %s. Moved to: %s",
-									urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
-					urlConn.disconnect();
-					return attemptDownload(newUrl, retryNumber + 1, errorList);
-				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
-					log
-						.error(
-							String
-								.format(
-									"HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
-					Thread.sleep(defaultDelay * 1000);
-					errorList
-						.add(
-							String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
-					urlConn.disconnect();
-					return attemptDownload(requestUrl, retryNumber + 1, errorList);
-				} else {
-					input = urlConn.getInputStream();
-					responseType = urlConn.getContentType();
-					return input;
-				}
-			} catch (final IOException e) {
-				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
-				Thread.sleep(defaultDelay * 1000);
-				errorList.add(e.getMessage());
-				return attemptDownload(requestUrl, retryNumber + 1, errorList);
-			}
-		} catch (final InterruptedException e) {
-			throw new DnetCollectorException(e);
-		}
-	}
-
-	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
-		log.debug("StatusCode: " + urlConn.getResponseMessage());
-
-		for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
-			if (e.getKey() != null) {
-				for (final String v : e.getValue()) {
-					log.debug("  key: " + e.getKey() + " - value: " + v);
-				}
-			}
-		}
-	}
-
-	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
-		for (final String key : headerMap.keySet()) {
-			if (key != null
-				&& key.toLowerCase().equals("retry-after")
-				&& headerMap.get(key).size() > 0
-				&& NumberUtils.isNumber(headerMap.get(key).get(0))) {
-				return Integer.parseInt(headerMap.get(key).get(0)) + 10;
-			}
-		}
-		return -1;
-	}
-
-	private String obtainNewLocation(final Map<String, List<String>> headerMap)
-		throws DnetCollectorException {
-		for (final String key : headerMap.keySet()) {
-			if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) {
-				return headerMap.get(key).get(0);
-			}
-		}
-		throw new DnetCollectorException(
-			"The requested url has been MOVED, but 'location' param is MISSING");
-	}
-
-	/**
-	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
-	 */
-	public void initTrustManager() {
-		final X509TrustManager tm = new X509TrustManager() {
-
-			@Override
-			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
-			}
-
-			@Override
-			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
-			}
-
-			@Override
-			public X509Certificate[] getAcceptedIssuers() {
-				return null;
-			}
-		};
-		try {
-			final SSLContext ctx = SSLContext.getInstance("TLS");
-			ctx.init(null, new TrustManager[] {
-				tm
-			}, null);
-			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
-		} catch (final GeneralSecurityException e) {
-			log.fatal(e);
-			throw new IllegalStateException(e);
-		}
-	}
-
-	public int getMaxNumberOfRetry() {
-		return maxNumberOfRetry;
-	}
-
-	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
-		this.maxNumberOfRetry = maxNumberOfRetry;
-	}
-
-	public int getDefaultDelay() {
-		return defaultDelay;
-	}
-
-	public void setDefaultDelay(final int defaultDelay) {
-		this.defaultDelay = defaultDelay;
-	}
-
-	public int getReadTimeOut() {
-		return readTimeOut;
-	}
-
-	public void setReadTimeOut(final int readTimeOut) {
-		this.readTimeOut = readTimeOut;
-	}
-
-	public String getResponseType() {
-		return responseType;
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
@ -0,0 +1,29 @@
+
+package eu.dnetlib.dhp.transformation;
+
+public class DnetTransformationException extends Exception {
+
+	public DnetTransformationException() {
+		super();
+	}
+
+	public DnetTransformationException(
+		final String message,
+		final Throwable cause,
+		final boolean enableSuppression,
+		final boolean writableStackTrace) {
+		super(message, cause, enableSuppression, writableStackTrace);
+	}
+
+	public DnetTransformationException(final String message, final Throwable cause) {
+		super(message, cause);
+	}
+
+	public DnetTransformationException(final String message) {
+		super(message);
+	}
+
+	public DnetTransformationException(final Throwable cause) {
+		super(cause);
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
@ -1,74 +0,0 @@
-
-package eu.dnetlib.dhp.transformation;
-
-import java.io.ByteArrayInputStream;
-import java.io.StringWriter;
-import java.util.Map;
-
-import javax.xml.transform.stream.StreamSource;
-
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.util.LongAccumulator;
-
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.transformation.functions.Cleaner;
-import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
-import net.sf.saxon.s9api.*;
-
-public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
-
-	private final LongAccumulator totalItems;
-	private final LongAccumulator errorItems;
-	private final LongAccumulator transformedItems;
-	private final String transformationRule;
-	private final Cleaner cleanFunction;
-
-	private final long dateOfTransformation;
-
-	public TransformFunction(
-		LongAccumulator totalItems,
-		LongAccumulator errorItems,
-		LongAccumulator transformedItems,
-		final String transformationRule,
-		long dateOfTransformation,
-		final Map<String, Vocabulary> vocabularies)
-		throws Exception {
-		this.totalItems = totalItems;
-		this.errorItems = errorItems;
-		this.transformedItems = transformedItems;
-		this.transformationRule = transformationRule;
-		this.dateOfTransformation = dateOfTransformation;
-		cleanFunction = new Cleaner(vocabularies);
-	}
-
-	@Override
-	public MetadataRecord call(MetadataRecord value) {
-		totalItems.add(1);
-		try {
-			Processor processor = new Processor(false);
-			processor.registerExtensionFunction(cleanFunction);
-			final XsltCompiler comp = processor.newXsltCompiler();
-			XsltExecutable xslt = comp
-				.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
-			XdmNode source = processor
-				.newDocumentBuilder()
-				.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
-			XsltTransformer trans = xslt.load();
-			trans.setInitialContextNode(source);
-			final StringWriter output = new StringWriter();
-			Serializer out = processor.newSerializer(output);
-			out.setOutputProperty(Serializer.Property.METHOD, "xml");
-			out.setOutputProperty(Serializer.Property.INDENT, "yes");
-			trans.setDestination(out);
-			trans.transform();
-			final String xml = output.toString();
-			value.setBody(xml);
-			value.setDateOfTransformation(dateOfTransformation);
-			transformedItems.add(1);
-			return value;
-		} catch (Throwable e) {
-			errorItems.add(1);
-			return null;
-		}
-	}
-}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}`
				`@ -0,0 +1 @@`
				{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}