[scholexplorer model update] dropping result.pid, all of them are moved as instance.alternateIdentifier(s)

Merge branch 'stable_ids' into scholexplorer_model_update
2021-05-27 17:25:01 +02:00 · 2021-05-27 17:20:32 +02:00 · 2021-05-27 17:17:27 +02:00 · 2021-05-27 15:10:51 +02:00 · 2021-05-27 12:22:47 +02:00 · 2021-05-26 18:20:23 +02:00
465 changed files with 33318 additions and 9377 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,8 @@
 *.iws
 *~
 .vscode
 .metals
 .bloop
 .classpath
 /*/.classpath
 /*/*/.classpath
@ -24,4 +26,5 @@
 spark-warehouse
 /**/job-override.properties
 /**/*.log
 /**/.factorypath
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -7,6 +7,7 @@
 		<artifactId>dhp</artifactId>
 		<version>1.2.4-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 	<artifactId>dhp-common</artifactId>
@ -53,11 +54,6 @@
 			<groupId>com.fasterxml.jackson.core</groupId>
 			<artifactId>jackson-databind</artifactId>
 		</dependency>
 		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
 		<dependency>
 			<groupId>com.rabbitmq</groupId>
 			<artifactId>amqp-client</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>net.sf.saxon</groupId>
 			<artifactId>Saxon-HE</artifactId>
@ -98,6 +94,16 @@
 			<artifactId>dnet-pace-core</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.httpcomponents</groupId>
 			<artifactId>httpclient</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>org.mongodb</groupId>
 			<artifactId>mongo-java-driver</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>eu.dnetlib.dhp</groupId>
 			<artifactId>dhp-schemas</artifactId>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java
@ -0,0 +1,14 @@
 package eu.dnetlib.dhp.application;
 import java.io.*;
 import java.util.Map;
 import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
 import com.google.common.collect.Maps;
 public class ApplicationUtils {
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java
@ -1,10 +1,7 @@
 package eu.dnetlib.dhp.application;
-import java.io.ByteArrayInputStream;
+import java.io.*;
 import java.io.ByteArrayOutputStream;
 import java.io.Serializable;
 import java.io.StringWriter;
 import java.util.*;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
@ -12,17 +9,21 @@ import java.util.zip.GZIPOutputStream;
 import org.apache.commons.cli.*;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 public class ArgumentApplicationParser implements Serializable {
 	private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class);
 	private final Options options = new Options();
 	private final Map<String, String> objectMap = new HashMap<>();
 	private final List<String> compressedValues = new ArrayList<>();
-	public ArgumentApplicationParser(final String json_configuration) throws Exception {
+	public ArgumentApplicationParser(final String json_configuration) throws IOException {
 		final ObjectMapper mapper = new ObjectMapper();
 		final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
 		createOptionMap(configuration);
@ -33,7 +34,6 @@ public class ArgumentApplicationParser implements Serializable {
 	}
 	private void createOptionMap(final OptionsParameter[] configuration) {
 		Arrays
 			.stream(configuration)
 			.map(
@ -47,10 +47,6 @@ public class ArgumentApplicationParser implements Serializable {
 					return o;
 				})
 			.forEach(options::addOption);
 		// HelpFormatter formatter = new HelpFormatter();
 		// formatter.printHelp("myapp", null, options, null, true);
 	}
 	public static String decompressValue(final String abstractCompressed) {
@ -61,7 +57,7 @@ public class ArgumentApplicationParser implements Serializable {
 			IOUtils.copy(gis, stringWriter);
 			return stringWriter.toString();
 		} catch (Throwable e) {
-			System.out.println("Wrong value to decompress:" + abstractCompressed);
+			log.error("Wrong value to decompress:" + abstractCompressed);
 			throw new RuntimeException(e);
 		}
 	}
@ -74,7 +70,7 @@ public class ArgumentApplicationParser implements Serializable {
 		return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
 	}
-	public void parseArgument(final String[] args) throws Exception {
+	public void parseArgument(final String[] args) throws ParseException {
 		CommandLineParser parser = new BasicParser();
 		CommandLine cmd = parser.parse(options, args);
 		Arrays
--- a/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
+++ b/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java
@ -1,5 +1,5 @@
-package eu.dnetlib.collector.worker.model;
+package eu.dnetlib.dhp.collection;
 import java.util.HashMap;
 import java.util.Map;
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -27,4 +27,26 @@ public class Constants {
 		coarCodeLabelMap.put("c_f1cf", "EMBARGO");
 	}
 	public static final String SEQUENCE_FILE_NAME = "/sequence_file";
 	public static final String REPORT_FILE_NAME = "/report";
 	public static final String MDSTORE_DATA_PATH = "/store";
 	public static final String MDSTORE_SIZE_PATH = "/size";
 	public static final String COLLECTION_MODE = "collectionMode";
 	public static final String METADATA_ENCODING = "metadataEncoding";
 	public static final String OOZIE_WF_PATH = "oozieWfPath";
 	public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
 	public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
 	public static final String REQUEST_DELAY = "requestDelay";
 	public static final String RETRY_DELAY = "retryDelay";
 	public static final String CONNECT_TIMEOUT = "connectTimeOut";
 	public static final String READ_TIMEOUT = "readTimeOut";
 	public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
 	public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
 	public static final String CONTENT_TOTALITEMS = "TotalItems";
 	public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
 	public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -14,7 +14,7 @@ public class DbClient implements Closeable {
 	private static final Log log = LogFactory.getLog(DbClient.class);
-	private Connection connection;
+	private final Connection connection;
 	public DbClient(final String address, final String login, final String password) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@ -100,7 +100,7 @@ public class MakeTarArchive implements Serializable {
 			BufferedInputStream bis = new BufferedInputStream(is);
 			int count;
-			byte data[] = new byte[1024];
+			byte[] data = new byte[1024];
 			while ((count = bis.read(data, 0, data.length)) != -1) {
 				ar.write(data, 0, count);
 			}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/MdstoreClient.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/MdstoreClient.java
@ -1,39 +1,60 @@
-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.StreamSupport;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.bson.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.google.common.collect.Iterables;
 import com.mongodb.BasicDBObject;
 import com.mongodb.MongoClient;
 import com.mongodb.MongoClientURI;
 import com.mongodb.QueryBuilder;
 import com.mongodb.client.MongoCollection;
 import com.mongodb.client.MongoDatabase;
 public class MdstoreClient implements Closeable {
 	private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
 	private final MongoClient client;
 	private final MongoDatabase db;
 	private static final String COLL_METADATA = "metadata";
 	private static final String COLL_METADATA_MANAGER = "metadataManager";
 	private static final Log log = LogFactory.getLog(MdstoreClient.class);
 	public MdstoreClient(final String baseUrl, final String dbName) {
 		this.client = new MongoClient(new MongoClientURI(baseUrl));
 		this.db = getDb(client, dbName);
 	}
 	public MongoCollection<Document> mdStore(final String mdId) {
 		BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
 		log.info("querying current mdId: {}", query.toJson());
 		final String currentId = Optional
 			.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
 			.map(r -> r.first())
 			.map(d -> d.getString("currentId"))
 			.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
 		log.info("currentId: {}", currentId);
 		return getColl(db, currentId, true);
 	}
 	public Map<String, String> validCollections(
 		final String mdFormat, final String mdLayout, final String mdInterpretation) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -13,9 +13,9 @@ import okio.Source;
 public class InputStreamRequestBody extends RequestBody {
-	private InputStream inputStream;
+	private final InputStream inputStream;
-	private MediaType mediaType;
+	private final MediaType mediaType;
-	private long lenght;
+	private final long lenght;
 	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java
@ -0,0 +1,72 @@
 package eu.dnetlib.dhp.common.rest;
 import java.util.Arrays;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.client.methods.HttpUriRequest;
 import org.apache.http.entity.StringEntity;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 public class DNetRestClient {
 	private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
 	private static final ObjectMapper mapper = new ObjectMapper();
 	public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
 		final HttpGet httpGet = new HttpGet(url);
 		return doHTTPRequest(httpGet, clazz);
 	}
 	public static String doGET(final String url) throws Exception {
 		final HttpGet httpGet = new HttpGet(url);
 		return doHTTPRequest(httpGet);
 	}
 	public static <V> String doPOST(final String url, V objParam) throws Exception {
 		final HttpPost httpPost = new HttpPost(url);
 		if (objParam != null) {
 			final StringEntity entity = new StringEntity(mapper.writeValueAsString(objParam));
 			httpPost.setEntity(entity);
 			httpPost.setHeader("Accept", "application/json");
 			httpPost.setHeader("Content-type", "application/json");
 		}
 		return doHTTPRequest(httpPost);
 	}
 	public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception {
 		return mapper.readValue(doPOST(url, objParam), clazz);
 	}
 	private static String doHTTPRequest(final HttpUriRequest r) throws Exception {
 		CloseableHttpClient client = HttpClients.createDefault();
 		log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
 		log
 			.info(
 				"request headers: {}",
 				Arrays
 					.asList(r.getAllHeaders())
 					.stream()
 					.map(h -> h.getName() + ":" + h.getValue())
 					.collect(Collectors.joining(",")));
 		CloseableHttpResponse response = client.execute(r);
 		return IOUtils.toString(response.getEntity().getContent());
 	}
 	private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
 		return mapper.readValue(doHTTPRequest(r), clazz);
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;
 import java.io.Serializable;
 import java.util.HashMap;
@ -10,8 +10,8 @@ import org.apache.commons.lang3.StringUtils;
 import com.google.common.collect.Maps;
 import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 public class Vocabulary implements Serializable {
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;
 import java.io.Serializable;
 import java.util.*;
@ -7,8 +7,8 @@ import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -67,6 +67,10 @@ public class VocabularyGroup implements Serializable {
 	private final Map<String, Vocabulary> vocs = new HashMap<>();
 	public Set<String> vocabularyNames() {
 		return vocs.keySet();
 	}
 	public void addVocabulary(final String id, final String name) {
 		vocs.put(id.toLowerCase(), new Vocabulary(id, name));
 	}
@ -118,7 +122,31 @@ public class VocabularyGroup implements Serializable {
 		return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
 	}
 	/**
 	 * getSynonymAsQualifierCaseSensitive
 	 *
 	 * refelects the situation to check caseSensitive vocabulary
 	 */
 	public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
 		if (StringUtils.isBlank(vocId)) {
 			return OafMapperUtils.unknown("", "");
 		}
 		return vocs.get(vocId).getSynonymAsQualifier(syn);
 	}
 	/**
 	 * termExists
 	 *
 	 * two methods: without and with caseSensitive check
 	 */
 	public boolean termExists(final String vocId, final String id) {
 		return termExists(vocId, id, Boolean.FALSE);
 	}
 	public boolean termExists(final String vocId, final String id, final Boolean caseSensitive) {
 		if (Boolean.TRUE.equals(caseSensitive)) {
 			return vocabularyExists(vocId) && vocs.get(vocId).termExists(id);
 		}
 		return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
 	}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common.vocabulary;
 import java.io.Serializable;
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java
@ -0,0 +1,64 @@
 package eu.dnetlib.dhp.message;
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
 public class Message implements Serializable {
 	private static final long serialVersionUID = 401753881204524893L;
 	public static String CURRENT_PARAM = "current";
 	public static String TOTAL_PARAM = "total";
 	private MessageType messageType;
 	private String workflowId;
 	private Map<String, String> body;
 	public Message() {
 	}
 	public Message(final MessageType messageType, final String workflowId) {
 		this(messageType, workflowId, new LinkedHashMap<>());
 	}
 	public Message(final MessageType messageType, final String workflowId, final Map<String, String> body) {
 		this.messageType = messageType;
 		this.workflowId = workflowId;
 		this.body = body;
 	}
 	public MessageType getMessageType() {
 		return messageType;
 	}
 	public void setMessageType(MessageType messageType) {
 		this.messageType = messageType;
 	}
 	public String getWorkflowId() {
 		return workflowId;
 	}
 	public void setWorkflowId(final String workflowId) {
 		this.workflowId = workflowId;
 	}
 	public Map<String, String> getBody() {
 		return body;
 	}
 	public void setBody(final Map<String, String> body) {
 		this.body = body;
 	}
 	@Override
 	public String toString() {
 		return String.format("Message [type=%s, workflowId=%s, body=%s]", messageType, workflowId, body);
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageSender.java
@ -0,0 +1,94 @@
 package eu.dnetlib.dhp.message;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import org.apache.http.client.config.RequestConfig;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpPut;
 import org.apache.http.entity.ContentType;
 import org.apache.http.entity.StringEntity;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 public class MessageSender {
 	private static final Logger log = LoggerFactory.getLogger(MessageSender.class);
 	private static final int SOCKET_TIMEOUT_MS = 2000;
 	private static final int CONNECTION_REQUEST_TIMEOUT_MS = 2000;
 	private static final int CONNTECTION_TIMEOUT_MS = 2000;
 	private final ObjectMapper objectMapper = new ObjectMapper();
 	private final String dnetMessageEndpoint;
 	private final String workflowId;
 	private final ExecutorService executorService = Executors.newCachedThreadPool();
 	public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
 		this.workflowId = workflowId;
 		this.dnetMessageEndpoint = dnetMessageEndpoint;
 	}
 	public void sendMessage(final Message message) {
 		executorService.submit(() -> _sendMessage(message));
 	}
 	public void sendMessage(final Long current, final Long total) {
 		sendMessage(createOngoingMessage(current, total));
 	}
 	public void sendReport(final Map<String, String> report) {
 		sendMessage(new Message(MessageType.REPORT, workflowId, report));
 	}
 	private Message createOngoingMessage(final Long current, final Long total) {
 		final Message m = new Message(MessageType.ONGOING, workflowId);
 		m.getBody().put(Message.CURRENT_PARAM, current.toString());
 		if (total != null) {
 			m.getBody().put(Message.TOTAL_PARAM, total.toString());
 		}
 		return m;
 	}
 	private void _sendMessage(final Message message) {
 		try {
 			final String json = objectMapper.writeValueAsString(message);
 			final HttpPut req = new HttpPut(dnetMessageEndpoint);
 			req.setEntity(new StringEntity(json, ContentType.APPLICATION_JSON));
 			final RequestConfig requestConfig = RequestConfig
 				.custom()
 				.setConnectTimeout(CONNTECTION_TIMEOUT_MS)
 				.setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT_MS)
 				.setSocketTimeout(SOCKET_TIMEOUT_MS)
 				.build();
 			try (final CloseableHttpClient client = HttpClients
 				.custom()
 				.setDefaultRequestConfig(requestConfig)
 				.build();
 				final CloseableHttpResponse response = client.execute(req)) {
 				log.debug("Sent Message to " + dnetMessageEndpoint);
 				log.debug("MESSAGE:" + message);
 			} catch (final Throwable e) {
 				log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
 			}
 		} catch (final JsonProcessingException e) {
 			log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
 		}
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/MessageType.java
@ -0,0 +1,21 @@
 package eu.dnetlib.dhp.message;
 import java.io.Serializable;
 import java.util.Optional;
 import org.apache.commons.lang3.StringUtils;
 public enum MessageType implements Serializable {
 	ONGOING, REPORT;
 	public MessageType from(String value) {
 		return Optional
 			.ofNullable(value)
 			.map(StringUtils::upperCase)
 			.map(MessageType::valueOf)
 			.orElseThrow(() -> new IllegalArgumentException("unknown message type: " + value));
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
@ -1,121 +0,0 @@
 package eu.dnetlib.dhp.model.mdstore;
 import java.io.Serializable;
 import eu.dnetlib.dhp.utils.DHPUtils;
 /** This class models a record inside the new Metadata store collection on HDFS * */
 public class MetadataRecord implements Serializable {
 	/** The D-Net Identifier associated to the record */
 	private String id;
 	/** The original Identifier of the record */
 	private String originalId;
 	/** The encoding of the record, should be JSON or XML */
 	private String encoding;
 	/**
 	 * The information about the provenance of the record see @{@link Provenance} for the model of this information
 	 */
 	private Provenance provenance;
 	/** The content of the metadata */
 	private String body;
 	/** the date when the record has been stored */
 	private long dateOfCollection;
 	/** the date when the record has been stored */
 	private long dateOfTransformation;
 	public MetadataRecord() {
 		this.dateOfCollection = System.currentTimeMillis();
 	}
 	public MetadataRecord(
 		String originalId,
 		String encoding,
 		Provenance provenance,
 		String body,
 		long dateOfCollection) {
 		this.originalId = originalId;
 		this.encoding = encoding;
 		this.provenance = provenance;
 		this.body = body;
 		this.dateOfCollection = dateOfCollection;
 		this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
 	}
 	public String getId() {
 		return id;
 	}
 	public void setId(String id) {
 		this.id = id;
 	}
 	public String getOriginalId() {
 		return originalId;
 	}
 	public void setOriginalId(String originalId) {
 		this.originalId = originalId;
 	}
 	public String getEncoding() {
 		return encoding;
 	}
 	public void setEncoding(String encoding) {
 		this.encoding = encoding;
 	}
 	public Provenance getProvenance() {
 		return provenance;
 	}
 	public void setProvenance(Provenance provenance) {
 		this.provenance = provenance;
 	}
 	public String getBody() {
 		return body;
 	}
 	public void setBody(String body) {
 		this.body = body;
 	}
 	public long getDateOfCollection() {
 		return dateOfCollection;
 	}
 	public void setDateOfCollection(long dateOfCollection) {
 		this.dateOfCollection = dateOfCollection;
 	}
 	public long getDateOfTransformation() {
 		return dateOfTransformation;
 	}
 	public void setDateOfTransformation(long dateOfTransformation) {
 		this.dateOfTransformation = dateOfTransformation;
 	}
 	@Override
 	public boolean equals(Object o) {
 		if (!(o instanceof MetadataRecord)) {
 			return false;
 		}
 		return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
 	}
 	@Override
 	public int hashCode() {
 		return id.hashCode();
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
@ -1,52 +0,0 @@
 package eu.dnetlib.dhp.model.mdstore;
 import java.io.Serializable;
 /**
 * @author Sandro La Bruzzo
 *         <p>
 *         Provenace class models the provenance of the record in the metadataStore It contains the identifier and the
 *         name of the datasource that gives the record
 */
 public class Provenance implements Serializable {
 	private String datasourceId;
 	private String datasourceName;
 	private String nsPrefix;
 	public Provenance() {
 	}
 	public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
 		this.datasourceId = datasourceId;
 		this.datasourceName = datasourceName;
 		this.nsPrefix = nsPrefix;
 	}
 	public String getDatasourceId() {
 		return datasourceId;
 	}
 	public void setDatasourceId(String datasourceId) {
 		this.datasourceId = datasourceId;
 	}
 	public String getDatasourceName() {
 		return datasourceName;
 	}
 	public void setDatasourceName(String datasourceName) {
 		this.datasourceName = datasourceName;
 	}
 	public String getNsPrefix() {
 		return nsPrefix;
 	}
 	public void setNsPrefix(String nsPrefix) {
 		this.nsPrefix = nsPrefix;
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java
@ -1,49 +0,0 @@
 package eu.dnetlib.dhp.schema.oaf;
 import java.util.Comparator;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 public class ResultTypeComparator implements Comparator<Result> {
 	@Override
 	public int compare(Result left, Result right) {
 		if (left == null && right == null)
 			return 0;
 		if (left == null)
 			return 1;
 		if (right == null)
 			return -1;
 		String lClass = left.getResulttype().getClassid();
 		String rClass = right.getResulttype().getClassid();
 		if (lClass.equals(rClass))
 			return 0;
 		if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
 			return -1;
 		if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
 			return 1;
 		if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
 			return -1;
 		if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
 			return 1;
 		if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
 			return -1;
 		if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
 			return 1;
 		if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
 			return -1;
 		if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
 			return 1;
 		// Else (but unlikely), lexicographical ordering will do.
 		return lClass.compareTo(rClass);
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
@ -1,33 +1,26 @@
-package eu.dnetlib.dhp.oa.graph.clean;
+package eu.dnetlib.dhp.schema.oaf.utils;
 import java.util.*;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.lang3.StringUtils;
-import com.clearspring.analytics.util.Lists;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;
-public class CleaningFunctions {
+public class GraphCleaningFunctions extends CleaningFunctions {
 	public static final String DOI_PREFIX_REGEX = "^10\\.";
 	public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
 	public static final int ORCID_LEN = 19;
 	public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
-
+	public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
-	public static final Set<String> PID_BLACKLIST = new HashSet<>();
+	public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
-
+	public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]";
-	static {
+	public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10;
 		PID_BLACKLIST.add("none");
 		PID_BLACKLIST.add("na");
 	}
 	public static <T extends Oaf> T fixVocabularyNames(T value) {
 		if (value instanceof Datasource) {
@ -59,23 +52,17 @@ public class CleaningFunctions {
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
-				r
+				r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
 					.getAuthor()
 					.stream()
 					.filter(Objects::nonNull)
 					.forEach(a -> {
 					if (Objects.nonNull(a.getPid())) {
-							a
+						a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
-								.getPid()
+							fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
-								.stream()
+						});
 								.filter(Objects::nonNull)
 								.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES));
 					}
 				});
 			}
 			if (value instanceof Publication) {
-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
+			} else if (value instanceof Dataset) {
 			} else if (value instanceof OtherResearchProduct) {
@ -87,7 +74,37 @@ public class CleaningFunctions {
 		return value;
 	}
-	public static <T extends Oaf> T fixDefaults(T value) {
+	public static <T extends Oaf> boolean filter(T value) {
 		if (value instanceof Datasource) {
 			// nothing to evaluate here
 		} else if (value instanceof Project) {
 			// nothing to evaluate here
 		} else if (value instanceof Organization) {
 			// nothing to evaluate here
 		} else if (value instanceof Relation) {
 			// nothing to clean here
 		} else if (value instanceof Result) {
 			Result r = (Result) value;
 			if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) {
 				return false;
 			}
 			if (value instanceof Publication) {
 			} else if (value instanceof Dataset) {
 			} else if (value instanceof OtherResearchProduct) {
 			} else if (value instanceof Software) {
 			}
 		}
 		return true;
 	}
 	public static <T extends Oaf> T cleanup(T value) {
 		if (value instanceof Datasource) {
 			// nothing to clean here
 		} else if (value instanceof Project) {
@ -110,16 +127,6 @@ public class CleaningFunctions {
 					.setLanguage(
 						qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
 			}
 			if (Objects.nonNull(r.getCountry())) {
 				r
 					.setCountry(
 						r
 							.getCountry()
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(c -> StringUtils.isNotBlank(c.getClassid()))
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getSubject())) {
 				r
 					.setSubject(
@ -130,7 +137,7 @@ public class CleaningFunctions {
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
 							.filter(sp -> Objects.nonNull(sp.getQualifier()))
 							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
-							.map(CleaningFunctions::cleanValue)
+							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getTitle())) {
@ -141,7 +148,13 @@ public class CleaningFunctions {
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
-							.map(CleaningFunctions::cleanValue)
+							.filter(
 								sp -> sp
 									.getValue()
 									.toLowerCase()
 									.replaceAll(TITLE_FILTER_REGEX, "")
 									.length() > TITLE_FILTER_RESIDUAL_LENGTH)
 							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getDescription())) {
@ -152,22 +165,11 @@ public class CleaningFunctions {
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
-							.map(CleaningFunctions::cleanValue)
+							.map(GraphCleaningFunctions::cleanValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.nonNull(r.getPid())) {
-				r
+				r.setPid(processPidCleaning(r.getPid()));
 					.setPid(
 						r
 							.getPid()
 							.stream()
 							.filter(Objects::nonNull)
 							.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
 							.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
 							.filter(sp -> Objects.nonNull(sp.getQualifier()))
 							.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
 							.map(CleaningFunctions::normalizePidValue)
 							.collect(Collectors.toList()));
 			}
 			if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
 				r
@ -175,11 +177,36 @@ public class CleaningFunctions {
 						qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
 			}
 			if (Objects.nonNull(r.getInstance())) {
 				for (Instance i : r.getInstance()) {
 					Optional
 						.ofNullable(i.getPid())
 						.ifPresent(pid -> {
 							final Set<StructuredProperty> pids = pid
 								.stream()
 								.filter(Objects::nonNull)
 								.filter(p -> StringUtils.isNotBlank(p.getValue()))
 								.collect(Collectors.toCollection(HashSet::new));
 							Optional
 								.ofNullable(i.getAlternateIdentifier())
 								.ifPresent(altId -> {
 									final Set<StructuredProperty> altIds = altId
 										.stream()
 										.filter(Objects::nonNull)
 										.filter(p -> StringUtils.isNotBlank(p.getValue()))
 										.collect(Collectors.toCollection(HashSet::new));
 									i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
 								});
 						});
 					if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
 						i
 							.setAccessright(
-								qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
+								accessRight(
 									ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
 									ModelConstants.DNET_ACCESS_MODES));
 					}
 					if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
 						i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
@ -190,37 +217,19 @@ public class CleaningFunctions {
 				}
 			}
 			if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
-				Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance());
+				Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
 				if (Objects.isNull(bestaccessrights)) {
 					r
 						.setBestaccessright(
-							qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
+							qualifier(
 								ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
 								ModelConstants.DNET_ACCESS_MODES));
 				} else {
 					r.setBestaccessright(bestaccessrights);
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
-				r
+				final List<Author> authors = Lists.newArrayList();
 					.setAuthor(
 						r
 							.getAuthor()
 							.stream()
 							.filter(a -> Objects.nonNull(a))
 							.filter(a -> StringUtils.isNotBlank(a.getFullname()))
 							.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
 							.collect(Collectors.toList()));
 				boolean nullRank = r
 					.getAuthor()
 					.stream()
 					.anyMatch(a -> Objects.isNull(a.getRank()));
 				if (nullRank) {
 					int i = 1;
 					for (Author author : r.getAuthor()) {
 						author.setRank(i++);
 					}
 				}
 				for (Author a : r.getAuthor()) {
 					if (Objects.isNull(a.getPid())) {
 						a.setPid(Lists.newArrayList());
@ -234,57 +243,44 @@ public class CleaningFunctions {
 									.filter(p -> Objects.nonNull(p.getQualifier()))
 									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.map(p -> {
-										// hack to distinguish orcid from orcid_pending
+										p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
 										String pidProvenance = Optional
 											.ofNullable(p.getDataInfo())
 											.map(
 												d -> Optional
 													.ofNullable(d.getProvenanceaction())
 													.map(Qualifier::getClassid)
 													.orElse(""))
 											.orElse("");
 										if (p
 											.getQualifier()
 											.getClassid()
 											.toLowerCase()
 											.contains(ModelConstants.ORCID)) {
 											if (pidProvenance
 												.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
 												p.getQualifier().setClassid(ModelConstants.ORCID);
 											} else {
 												p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
 											}
 											final String orcid = p
 												.getValue()
 												.trim()
 												.toLowerCase()
 												.replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
 											if (orcid.length() == ORCID_LEN) {
 												p.setValue(orcid);
 											} else {
 												p.setValue("");
 											}
 										}
 										return p;
 									})
 									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.collect(
 										Collectors
 											.toMap(
-												p -> p.getQualifier().getClassid() + p.getValue(),
+												StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1,
 												Function.identity(),
 												(p1, p2) -> p1,
 												LinkedHashMap::new))
 									.values()
 									.stream()
 									.collect(Collectors.toList()));
 					}
 					if (StringUtils.isBlank(a.getFullname())) {
 						if (StringUtils.isNotBlank(a.getName()) && StringUtils.isNotBlank(a.getSurname())) {
 							a.setFullname(a.getSurname() + ", " + a.getName());
 						}
 					}
 					if (StringUtils.isNotBlank(a.getFullname()) && isValidAuthorName(a)) {
 						authors.add(a);
 					}
 				}
 				boolean nullRank = authors
 					.stream()
 					.anyMatch(a -> Objects.isNull(a.getRank()));
 				if (nullRank) {
 					int i = 1;
 					for (Author author : authors) {
 						author.setRank(i++);
 					}
 				}
 				r.setAuthor(authors);
 			}
 			if (value instanceof Publication) {
-			} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
+			} else if (value instanceof Dataset) {
 			} else if (value instanceof OtherResearchProduct) {
@ -296,6 +292,49 @@ public class CleaningFunctions {
 		return value;
 	}
 	// HELPERS
 	private static boolean isValidAuthorName(Author a) {
 		return !Stream
 			.of(a.getFullname(), a.getName(), a.getSurname())
 			.filter(s -> s != null && !s.isEmpty())
 			.collect(Collectors.joining(""))
 			.toLowerCase()
 			.matches(INVALID_AUTHOR_REGEX);
 	}
 	private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
 		return pids
 			.stream()
 			.filter(Objects::nonNull)
 			.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
 			.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
 			.filter(sp -> Objects.nonNull(sp.getQualifier()))
 			.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
 			.map(CleaningFunctions::normalizePidValue)
 			.filter(CleaningFunctions::pidFilter)
 			.collect(Collectors.toList());
 	}
 	private static void fixVocabName(Qualifier q, String vocabularyName) {
 		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
 			q.setSchemeid(vocabularyName);
 			q.setSchemename(vocabularyName);
 		}
 	}
 	private static AccessRight accessRight(String classid, String classname, String scheme) {
 		return OafMapperUtils
 			.accessRight(
 				classid, classname, scheme, scheme);
 	}
 	private static Qualifier qualifier(String classid, String classname, String scheme) {
 		return OafMapperUtils
 			.qualifier(
 				classid, classname, scheme, scheme);
 	}
 	protected static StructuredProperty cleanValue(StructuredProperty s) {
 		s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
 		return s;
@ -306,39 +345,4 @@ public class CleaningFunctions {
 		return s;
 	}
 	// HELPERS
 	private static void fixVocabName(Qualifier q, String vocabularyName) {
 		if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
 			q.setSchemeid(vocabularyName);
 			q.setSchemename(vocabularyName);
 		}
 	}
 	private static Qualifier qualifier(String classid, String classname, String scheme) {
 		return OafMapperUtils
 			.qualifier(
 				classid, classname, scheme, scheme);
 	}
 	/**
 	 * Utility method that normalises PID values on a per-type basis.
 	 * @param pid the PID whose value will be normalised.
 	 * @return the PID containing the normalised value.
 	 */
 	public static StructuredProperty normalizePidValue(StructuredProperty pid) {
 		String value = Optional
 			.ofNullable(pid.getValue())
 			.map(String::trim)
 			.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
 		switch (pid.getQualifier().getClassid()) {
 			// TODO add cleaning for more PID types as needed
 			case "doi":
 				pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
 				break;
 		}
 		return pid;
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -1,11 +1,9 @@
-package eu.dnetlib.dhp.schema.oaf;
+package eu.dnetlib.dhp.schema.oaf.utils;
-import java.util.ArrayList;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-import java.util.Arrays;
+
-import java.util.List;
+import java.util.*;
 import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import java.util.function.Predicate;
@ -13,42 +11,45 @@ import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
-import com.google.common.base.Joiner;
+import eu.dnetlib.dhp.schema.common.AccessRightComparator;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.dhp.schema.oaf.*;
 public class OafMapperUtils {
-	public static Oaf merge(final Oaf o1, final Oaf o2) {
+	public static Oaf merge(final Oaf left, final Oaf right) {
-		if (ModelSupport.isSubClass(o1, OafEntity.class)) {
+		if (ModelSupport.isSubClass(left, OafEntity.class)) {
-			if (ModelSupport.isSubClass(o1, Result.class)) {
+			return mergeEntities((OafEntity) left, (OafEntity) right);
-
+		} else if (ModelSupport.isSubClass(left, Relation.class)) {
-				return mergeResults((Result) o1, (Result) o2);
+			((Relation) left).mergeFrom((Relation) right);
 			} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
 				((Datasource) o1).mergeFrom((Datasource) o2);
 			} else if (ModelSupport.isSubClass(o1, Organization.class)) {
 				((Organization) o1).mergeFrom((Organization) o2);
 			} else if (ModelSupport.isSubClass(o1, Project.class)) {
 				((Project) o1).mergeFrom((Project) o2);
 		} else {
-				throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
+			throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName());
 		}
-		} else if (ModelSupport.isSubClass(o1, Relation.class)) {
+		return left;
 			((Relation) o1).mergeFrom((Relation) o2);
 		} else {
 			throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
 		}
 		return o1;
 	}
-	public static Result mergeResults(Result r1, Result r2) {
+	public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
-		if (new ResultTypeComparator().compare(r1, r2) < 0) {
+		if (ModelSupport.isSubClass(left, Result.class)) {
-			r1.mergeFrom(r2);
+			return mergeResults((Result) left, (Result) right);
-			return r1;
+		} else if (ModelSupport.isSubClass(left, Datasource.class)) {
 			left.mergeFrom(right);
 		} else if (ModelSupport.isSubClass(left, Organization.class)) {
 			left.mergeFrom(right);
 		} else if (ModelSupport.isSubClass(left, Project.class)) {
 			left.mergeFrom(right);
 		} else {
-			r2.mergeFrom(r1);
+			throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
-			return r2;
+		}
 		return left;
 	}
 	public static Result mergeResults(Result left, Result right) {
 		if (new ResultTypeComparator().compare(left, right) < 0) {
 			left.mergeFrom(right);
 			return left;
 		} else {
 			right.mergeFrom(left);
 			return right;
 		}
 	}
@ -104,6 +105,29 @@ public class OafMapperUtils {
 		return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
 	}
 	public static AccessRight accessRight(
 		final String classid,
 		final String classname,
 		final String schemeid,
 		final String schemename) {
 		return accessRight(classid, classname, schemeid, schemename, null);
 	}
 	public static AccessRight accessRight(
 		final String classid,
 		final String classname,
 		final String schemeid,
 		final String schemename,
 		final OpenAccessRoute openAccessRoute) {
 		final AccessRight accessRight = new AccessRight();
 		accessRight.setClassid(classid);
 		accessRight.setClassname(classname);
 		accessRight.setSchemeid(schemeid);
 		accessRight.setSchemename(schemename);
 		accessRight.setOpenAccessRoute(openAccessRoute);
 		return accessRight;
 	}
 	public static Qualifier qualifier(
 		final String classid,
 		final String classname,
@ -117,6 +141,15 @@ public class OafMapperUtils {
 		return q;
 	}
 	public static Qualifier qualifier(final Qualifier qualifier) {
 		final Qualifier q = new Qualifier();
 		q.setClassid(qualifier.getClassid());
 		q.setClassname(qualifier.getClassname());
 		q.setSchemeid(qualifier.getSchemeid());
 		q.setSchemename(qualifier.getSchemename());
 		return q;
 	}
 	public static StructuredProperty structuredProperty(
 		final String value,
 		final String classid,
@ -267,7 +300,7 @@ public class OafMapperUtils {
 		} else if (to_md5) {
 			final String nsPrefix = StringUtils.substringBefore(originalId, "::");
 			final String rest = StringUtils.substringAfter(originalId, "::");
-			return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
+			return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
 		} else {
 			return String.format("%s|%s", prefix, originalId);
 		}
@ -300,4 +333,36 @@ public class OafMapperUtils {
 		final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
 		return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
 	}
 	public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
 		return getBestAccessRights(instanceList);
 	}
 	protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
 		if (instanceList != null) {
 			final Optional<AccessRight> min = instanceList
 				.stream()
 				.map(i -> i.getAccessright())
 				.min(new AccessRightComparator<>());
 			final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier();
 			if (StringUtils.isBlank(rights.getClassid())) {
 				rights.setClassid(UNKNOWN);
 			}
 			if (StringUtils.isBlank(rights.getClassname())
 				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
 				rights.setClassname(NOT_AVAILABLE);
 			}
 			if (StringUtils.isBlank(rights.getSchemeid())) {
 				rights.setSchemeid(DNET_ACCESS_MODES);
 			}
 			if (StringUtils.isBlank(rights.getSchemename())) {
 				rights.setSchemename(DNET_ACCESS_MODES);
 			}
 			return rights;
 		}
 		return null;
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
@ -1,18 +1,29 @@
 package eu.dnetlib.dhp.utils;
-import java.io.ByteArrayInputStream;
+import java.io.*;
 import java.io.ByteArrayOutputStream;
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.binary.Base64OutputStream;
 import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.SaveMode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Maps;
 import com.jayway.jsonpath.JsonPath;
 import net.minidev.json.JSONArray;
@ -21,6 +32,8 @@ import scala.collection.Seq;
 public class DHPUtils {
 	private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
 	public static Seq<String> toSeq(List<String> list) {
 		return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
 	}
@ -79,4 +92,72 @@ public class DHPUtils {
 			return "";
 		}
 	}
 	public static final ObjectMapper MAPPER = new ObjectMapper();
 	public static void writeHdfsFile(final Configuration conf, final String content, final String path)
 		throws IOException {
 		log.info("writing file {}, size {}", path, content.length());
 		try (FileSystem fs = FileSystem.get(conf);
 			BufferedOutputStream os = new BufferedOutputStream(fs.create(new Path(path)))) {
 			os.write(content.getBytes(StandardCharsets.UTF_8));
 			os.flush();
 		}
 	}
 	public static String readHdfsFile(Configuration conf, String path) throws IOException {
 		log.info("reading file {}", path);
 		try (FileSystem fs = FileSystem.get(conf)) {
 			final Path p = new Path(path);
 			if (!fs.exists(p)) {
 				throw new FileNotFoundException(path);
 			}
 			return IOUtils.toString(fs.open(p));
 		}
 	}
 	public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
 		return MAPPER.readValue(readHdfsFile(conf, path), clazz);
 	}
 	public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
 		log.info("saving dataset in: {}", targetPath);
 		mdstore
 			.write()
 			.mode(SaveMode.Overwrite)
 			.format("parquet")
 			.save(targetPath);
 	}
 	public static Configuration getHadoopConfiguration(String nameNode) {
 		// ====== Init HDFS File System Object
 		Configuration conf = new Configuration();
 		// Set FileSystem URI
 		conf.set("fs.defaultFS", nameNode);
 		// Because of Maven
 		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
 		System.setProperty("hadoop.home.dir", "/");
 		return conf;
 	}
 	public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
 		File file = new File(System.getProperty("oozie.action.output.properties"));
 		Properties props = new Properties();
 		report.forEach((k, v) -> props.setProperty(k, v));
 		try (OutputStream os = new FileOutputStream(file)) {
 			props.store(os, "");
 		}
 	}
 	public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
 		Map<String, String> report = Maps.newHashMap();
 		report.put(paramName, value);
 		populateOOZIEEnv(report);
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
@ -15,8 +15,8 @@ public class ISLookupClientFactory {
 	private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
-	private static int requestTimeout = 60000 * 10;
+	private static final int requestTimeout = 60000 * 10;
-	private static int connectTimeout = 60000 * 10;
+	private static final int connectTimeout = 60000 * 10;
 	public static ISLookUpService getLookUpService(final String isLookupUrl) {
 		return getServiceStub(ISLookUpService.class, isLookupUrl);
--- a/dhp-common/src/main/java/eu/dnetlib/message/Message.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/Message.java
@ -1,76 +0,0 @@
 package eu.dnetlib.message;
 import java.io.IOException;
 import java.util.Map;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 public class Message {
 	private String workflowId;
 	private String jobName;
 	private MessageType type;
 	private Map<String, String> body;
 	public static Message fromJson(final String json) throws IOException {
 		final ObjectMapper jsonMapper = new ObjectMapper();
 		return jsonMapper.readValue(json, Message.class);
 	}
 	public Message() {
 	}
 	public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
 		this.workflowId = workflowId;
 		this.jobName = jobName;
 		this.type = type;
 		this.body = body;
 	}
 	public String getWorkflowId() {
 		return workflowId;
 	}
 	public void setWorkflowId(String workflowId) {
 		this.workflowId = workflowId;
 	}
 	public String getJobName() {
 		return jobName;
 	}
 	public void setJobName(String jobName) {
 		this.jobName = jobName;
 	}
 	public MessageType getType() {
 		return type;
 	}
 	public void setType(MessageType type) {
 		this.type = type;
 	}
 	public Map<String, String> getBody() {
 		return body;
 	}
 	public void setBody(Map<String, String> body) {
 		this.body = body;
 	}
 	@Override
 	public String toString() {
 		final ObjectMapper jsonMapper = new ObjectMapper();
 		try {
 			return jsonMapper.writeValueAsString(this);
 		} catch (JsonProcessingException e) {
 			return null;
 		}
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java
@ -1,47 +0,0 @@
 package eu.dnetlib.message;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.concurrent.LinkedBlockingQueue;
 import com.rabbitmq.client.AMQP;
 import com.rabbitmq.client.Channel;
 import com.rabbitmq.client.DefaultConsumer;
 import com.rabbitmq.client.Envelope;
 public class MessageConsumer extends DefaultConsumer {
 	final LinkedBlockingQueue<Message> queueMessages;
 	/**
 	 * Constructs a new instance and records its association to the passed-in channel.
 	 *
 	 * @param channel the channel to which this consumer is attached
 	 * @param queueMessages
 	 */
 	public MessageConsumer(Channel channel, LinkedBlockingQueue<Message> queueMessages) {
 		super(channel);
 		this.queueMessages = queueMessages;
 	}
 	@Override
 	public void handleDelivery(
 		String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
 		throws IOException {
 		final String json = new String(body, StandardCharsets.UTF_8);
 		Message message = Message.fromJson(json);
 		try {
 			this.queueMessages.put(message);
 			System.out.println("Receiving Message " + message);
 		} catch (InterruptedException e) {
 			if (message.getType() == MessageType.REPORT)
 				throw new RuntimeException("Error on sending message");
 			else {
 				// TODO LOGGING EXCEPTION
 			}
 		} finally {
 			getChannel().basicAck(envelope.getDeliveryTag(), false);
 		}
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
@ -1,136 +0,0 @@
 package eu.dnetlib.message;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeoutException;
 import com.rabbitmq.client.Channel;
 import com.rabbitmq.client.Connection;
 import com.rabbitmq.client.ConnectionFactory;
 public class MessageManager {
 	private final String messageHost;
 	private final String username;
 	private final String password;
 	private Connection connection;
 	private final Map<String, Channel> channels = new HashMap<>();
 	private boolean durable;
 	private boolean autodelete;
 	private final LinkedBlockingQueue<Message> queueMessages;
 	public MessageManager(
 		String messageHost,
 		String username,
 		String password,
 		final LinkedBlockingQueue<Message> queueMessages) {
 		this.queueMessages = queueMessages;
 		this.messageHost = messageHost;
 		this.username = username;
 		this.password = password;
 	}
 	public MessageManager(
 		String messageHost,
 		String username,
 		String password,
 		boolean durable,
 		boolean autodelete,
 		final LinkedBlockingQueue<Message> queueMessages) {
 		this.queueMessages = queueMessages;
 		this.messageHost = messageHost;
 		this.username = username;
 		this.password = password;
 		this.durable = durable;
 		this.autodelete = autodelete;
 	}
 	private Connection createConnection() throws IOException, TimeoutException {
 		ConnectionFactory factory = new ConnectionFactory();
 		factory.setHost(this.messageHost);
 		factory.setUsername(this.username);
 		factory.setPassword(this.password);
 		return factory.newConnection();
 	}
 	private Channel createChannel(
 		final Connection connection,
 		final String queueName,
 		final boolean durable,
 		final boolean autodelete)
 		throws Exception {
 		Map<String, Object> args = new HashMap<>();
 		args.put("x-message-ttl", 10000);
 		Channel channel = connection.createChannel();
 		channel.queueDeclare(queueName, durable, false, this.autodelete, args);
 		return channel;
 	}
 	private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
 		throws Exception {
 		if (channels.containsKey(queueName)) {
 			return channels.get(queueName);
 		}
 		if (this.connection == null) {
 			this.connection = createConnection();
 		}
 		channels.put(queueName, createChannel(this.connection, queueName, durable, autodelete));
 		return channels.get(queueName);
 	}
 	public void close() throws IOException {
 		channels
 			.values()
 			.forEach(
 				ch -> {
 					try {
 						ch.close();
 					} catch (Exception e) {
 						// TODO LOG
 					}
 				});
 		this.connection.close();
 	}
 	public boolean sendMessage(final Message message, String queueName) throws Exception {
 		try {
 			Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
 			channel.basicPublish("", queueName, null, message.toString().getBytes());
 			return true;
 		} catch (Throwable e) {
 			throw new RuntimeException(e);
 		}
 	}
 	public boolean sendMessage(
 		final Message message, String queueName, boolean durable_var, boolean autodelete_var)
 		throws Exception {
 		try {
 			Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
 			channel.basicPublish("", queueName, null, message.toString().getBytes());
 			return true;
 		} catch (Throwable e) {
 			throw new RuntimeException(e);
 		}
 	}
 	public void startConsumingMessage(
 		final String queueName, final boolean durable, final boolean autodelete) throws Exception {
 		Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
 		channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
 	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java
@ -1,6 +0,0 @@
 package eu.dnetlib.message;
 public enum MessageType {
 	ONGOING, REPORT
 }
--- a/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/model/mdstore/MetadataRecordTest.java
@ -1,16 +0,0 @@
 package eu.dnetlib.dhp.model.mdstore;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import org.junit.jupiter.api.Test;
 public class MetadataRecordTest {
 	@Test
 	public void getTimestamp() {
 		MetadataRecord r = new MetadataRecord();
 		assertTrue(r.getDateOfCollection() > 0);
 	}
 }
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -0,0 +1,69 @@
 package eu.dnetlib.dhp.schema.oaf.utils;
 import static org.junit.jupiter.api.Assertions.*;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.List;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.junit.jupiter.api.Test;
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
 import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Result;
 public class OafMapperUtilsTest {
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
 		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
 	@Test
 	public void testMergePubs() throws IOException {
 		Publication p1 = read("publication_1.json", Publication.class);
 		Publication p2 = read("publication_2.json", Publication.class);
 		Dataset d1 = read("dataset_1.json", Dataset.class);
 		Dataset d2 = read("dataset_2.json", Dataset.class);
 		assertEquals(p1.getCollectedfrom().size(), 1);
 		assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID);
 		assertEquals(d2.getCollectedfrom().size(), 1);
 		assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
 		assertTrue(
 			OafMapperUtils
 				.mergeResults(p1, d2)
 				.getResulttype()
 				.getClassid()
 				.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
 		assertEquals(p2.getCollectedfrom().size(), 1);
 		assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
 		assertEquals(d1.getCollectedfrom().size(), 1);
 		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
 		assertTrue(
 			OafMapperUtils
 				.mergeResults(p2, d1)
 				.getResulttype()
 				.getClassid()
 				.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
 	}
 	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
 		return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new));
 	}
 	protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
 		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
 		return OBJECT_MAPPER.readValue(json, clazz);
 	}
 }
--- a/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java
@ -1,51 +0,0 @@
 package eu.dnetlib.message;
 import static org.junit.jupiter.api.Assertions.*;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import org.junit.jupiter.api.Test;
 public class MessageTest {
 	@Test
 	public void fromJsonTest() throws IOException {
 		Message m = new Message();
 		m.setWorkflowId("wId");
 		m.setType(MessageType.ONGOING);
 		m.setJobName("Collection");
 		Map<String, String> body = new HashMap<>();
 		body.put("parsedItem", "300");
 		body.put("ExecutionTime", "30s");
 		m.setBody(body);
 		System.out.println("m = " + m);
 		Message m1 = Message.fromJson(m.toString());
 		assertEquals(m1.getWorkflowId(), m.getWorkflowId());
 		assertEquals(m1.getType(), m.getType());
 		assertEquals(m1.getJobName(), m.getJobName());
 		assertNotNull(m1.getBody());
 		m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
 		assertEquals(m1.getJobName(), m.getJobName());
 	}
 	@Test
 	public void toStringTest() {
 		final String expectedJson = "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
 		Message m = new Message();
 		m.setWorkflowId("wId");
 		m.setType(MessageType.ONGOING);
 		m.setJobName("Collection");
 		Map<String, String> body = new HashMap<>();
 		body.put("parsedItem", "300");
 		body.put("ExecutionTime", "30s");
 		m.setBody(body);
 		assertEquals(expectedJson, m.toString());
 	}
 }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json
@ -0,0 +1 @@
 {"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
@ -0,0 +1 @@
 {"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json
@ -0,0 +1 @@
 {"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json
@ -0,0 +1 @@
 {"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@ -51,16 +51,6 @@
            <artifactId>hadoop-distcp</artifactId>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib</groupId>
            <artifactId>dnet-openaire-data-protos</artifactId>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-schemas</artifactId>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib</groupId>
            <artifactId>dnet-actionmanager-api</artifactId>
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/LicenseComparator.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/LicenseComparator.java
@ -1,69 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.migration;
 import java.util.Comparator;
 import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
 public class LicenseComparator implements Comparator<Qualifier> {
 	@Override
 	public int compare(Qualifier left, Qualifier right) {
 		if (left == null && right == null)
 			return 0;
 		if (left == null)
 			return 1;
 		if (right == null)
 			return -1;
 		String lClass = left.getClassid();
 		String rClass = right.getClassid();
 		if (lClass.equals(rClass))
 			return 0;
 		if (lClass.equals("OPEN SOURCE"))
 			return -1;
 		if (rClass.equals("OPEN SOURCE"))
 			return 1;
 		if (lClass.equals("OPEN"))
 			return -1;
 		if (rClass.equals("OPEN"))
 			return 1;
 		if (lClass.equals("6MONTHS"))
 			return -1;
 		if (rClass.equals("6MONTHS"))
 			return 1;
 		if (lClass.equals("12MONTHS"))
 			return -1;
 		if (rClass.equals("12MONTHS"))
 			return 1;
 		if (lClass.equals("EMBARGO"))
 			return -1;
 		if (rClass.equals("EMBARGO"))
 			return 1;
 		if (lClass.equals("RESTRICTED"))
 			return -1;
 		if (rClass.equals("RESTRICTED"))
 			return 1;
 		if (lClass.equals("CLOSED"))
 			return -1;
 		if (rClass.equals("CLOSED"))
 			return 1;
 		if (lClass.equals("UNKNOWN"))
 			return -1;
 		if (rClass.equals("UNKNOWN"))
 			return 1;
 		// Else (but unlikely), lexicographical ordering will do.
 		return lClass.compareTo(rClass);
 	}
 }
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
@ -1,196 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.migration;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Properties;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.tools.DistCp;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.util.ToolRunner;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class MigrateActionSet {
 	private static final Logger log = LoggerFactory.getLogger(MigrateActionSet.class);
 	private static final String SEPARATOR = "/";
 	private static final String TARGET_PATHS = "target_paths";
 	private static final String RAWSET_PREFIX = "rawset_";
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					MigrateActionSet.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
 		parser.parseArgument(args);
 		new MigrateActionSet().run(parser);
 	}
 	private void run(ArgumentApplicationParser parser) throws Exception {
 		final String isLookupUrl = parser.get("isLookupUrl");
 		final String sourceNN = parser.get("sourceNameNode");
 		final String targetNN = parser.get("targetNameNode");
 		final String workDir = parser.get("workingDirectory");
 		final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
 		final String distcp_memory_mb = parser.get("distcp_memory_mb");
 		final String distcp_task_timeout = parser.get("distcp_task_timeout");
 		final String transform_only_s = parser.get("transform_only");
 		log.info("transform only param: {}", transform_only_s);
 		final Boolean transformOnly = Boolean.valueOf(parser.get("transform_only"));
 		log.info("transform only: {}", transformOnly);
 		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
 		Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
 		FileSystem targetFS = FileSystem.get(conf);
 		Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
 		sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
 		FileSystem sourceFS = FileSystem.get(sourceConf);
 		Properties props = new Properties();
 		List<Path> targetPaths = new ArrayList<>();
 		final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
 		log
 			.info(
 				"paths to process:\n{}", sourcePaths
 					.stream()
 					.map(p -> p.toString())
 					.collect(Collectors.joining("\n")));
 		for (Path source : sourcePaths) {
 			if (!sourceFS.exists(source)) {
 				log.warn("skipping unexisting path: {}", source);
 			} else {
 				LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
 				final String rawSet = pathQ.pollLast();
 				log.info("got RAWSET: {}", rawSet);
 				if (StringUtils.isNotBlank(rawSet) && rawSet.startsWith(RAWSET_PREFIX)) {
 					final String actionSetDirectory = pathQ.pollLast();
 					final Path targetPath = new Path(
 						targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
 					log.info("using TARGET PATH: {}", targetPath);
 					if (!transformOnly) {
 						if (targetFS.exists(targetPath)) {
 							targetFS.delete(targetPath, true);
 						}
 						runDistcp(
 							distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
 					}
 					targetPaths.add(targetPath);
 				}
 			}
 		}
 		final String targetPathsCsv = targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(","));
 		props.setProperty(TARGET_PATHS, targetPathsCsv);
 		File file = new File(System.getProperty("oozie.action.output.properties"));
 		try (OutputStream os = new FileOutputStream(file)) {
 			props.store(os, "");
 		}
 		System.out.println(file.getAbsolutePath());
 	}
 	private void runDistcp(
 		Integer distcp_num_maps,
 		String distcp_memory_mb,
 		String distcp_task_timeout,
 		Configuration conf,
 		Path source,
 		Path targetPath)
 		throws Exception {
 		final DistCpOptions op = new DistCpOptions(source, targetPath);
 		op.setMaxMaps(distcp_num_maps);
 		op.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
 		op.preserve(DistCpOptions.FileAttribute.REPLICATION);
 		op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
 		int res = ToolRunner
 			.run(
 				new DistCp(conf, op),
 				new String[] {
 					"-Dmapred.task.timeout=" + distcp_task_timeout,
 					"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
 					"-pb",
 					"-m " + distcp_num_maps,
 					source.toString(),
 					targetPath.toString()
 				});
 		if (res != 0) {
 			throw new RuntimeException(String.format("distcp exited with code %s", res));
 		}
 	}
 	private Configuration getConfiguration(
 		String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
 		final Configuration conf = new Configuration();
 		conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
 		conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
 		conf.set("dfs.http.client.retry.policy.enabled", "true");
 		conf.set("mapred.task.timeout", distcp_task_timeout);
 		conf.set("mapreduce.map.memory.mb", distcp_memory_mb);
 		conf.set("mapred.map.tasks", String.valueOf(distcp_num_maps));
 		return conf;
 	}
 	private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
 		throws ISLookUpException {
 		String XQUERY = "distinct-values(\n"
 			+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
 			+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
 			+ "let $setDir := $x//SET/@directory/string()\n"
 			+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
 			+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
 		log.info(String.format("running xquery:\n%s", XQUERY));
 		return isLookUp
 			.quickSearchProfile(XQUERY)
 			.stream()
 			.map(p -> sourceNN + p)
 			.map(Path::new)
 			.collect(Collectors.toList());
 	}
 }
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
@ -1,710 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.migration;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import com.google.common.collect.Lists;
 import com.googlecode.protobuf.format.JsonFormat;
 import eu.dnetlib.data.proto.*;
 import eu.dnetlib.dhp.schema.oaf.*;
 public class ProtoConverter implements Serializable {
 	public static Oaf convert(OafProtos.Oaf oaf) {
 		try {
 			switch (oaf.getKind()) {
 				case entity:
 					return convertEntity(oaf);
 				case relation:
 					return convertRelation(oaf);
 				default:
 					throw new IllegalArgumentException("invalid kind " + oaf.getKind());
 			}
 		} catch (Throwable e) {
 			throw new RuntimeException("error on getting " + JsonFormat.printToString(oaf), e);
 		}
 	}
 	private static Relation convertRelation(OafProtos.Oaf oaf) {
 		final OafProtos.OafRel r = oaf.getRel();
 		final Relation rel = new Relation();
 		rel.setDataInfo(mapDataInfo(oaf.getDataInfo()));
 		rel.setLastupdatetimestamp(oaf.getLastupdatetimestamp());
 		rel.setSource(r.getSource());
 		rel.setTarget(r.getTarget());
 		rel.setRelType(r.getRelType().toString());
 		rel.setSubRelType(r.getSubRelType().toString());
 		rel.setRelClass(r.getRelClass());
 		rel
 			.setCollectedfrom(
 				r.getCollectedfromCount() > 0
 					? r.getCollectedfromList().stream().map(kv -> mapKV(kv)).collect(Collectors.toList())
 					: null);
 		return rel;
 	}
 	private static OafEntity convertEntity(OafProtos.Oaf oaf) {
 		switch (oaf.getEntity().getType()) {
 			case result:
 				final Result r = convertResult(oaf);
 				r.setInstance(convertInstances(oaf));
 				r.setExternalReference(convertExternalRefs(oaf));
 				return r;
 			case project:
 				return convertProject(oaf);
 			case datasource:
 				return convertDataSource(oaf);
 			case organization:
 				return convertOrganization(oaf);
 			default:
 				throw new RuntimeException("received unknown type");
 		}
 	}
 	private static List<Instance> convertInstances(OafProtos.Oaf oaf) {
 		final ResultProtos.Result r = oaf.getEntity().getResult();
 		if (r.getInstanceCount() > 0) {
 			return r.getInstanceList().stream().map(i -> convertInstance(i)).collect(Collectors.toList());
 		}
 		return Lists.newArrayList();
 	}
 	private static Instance convertInstance(ResultProtos.Result.Instance ri) {
 		final Instance i = new Instance();
 		i.setAccessright(mapQualifier(ri.getAccessright()));
 		i.setCollectedfrom(mapKV(ri.getCollectedfrom()));
 		i.setDateofacceptance(mapStringField(ri.getDateofacceptance()));
 		i.setDistributionlocation(ri.getDistributionlocation());
 		i.setHostedby(mapKV(ri.getHostedby()));
 		i.setInstancetype(mapQualifier(ri.getInstancetype()));
 		i.setLicense(mapStringField(ri.getLicense()));
 		i
 			.setUrl(
 				ri.getUrlList() != null ? ri
 					.getUrlList()
 					.stream()
 					.distinct()
 					.collect(Collectors.toCollection(ArrayList::new)) : null);
 		i.setRefereed(mapRefereed(ri.getRefereed()));
 		i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
 		i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
 		return i;
 	}
 	private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) {
 		Qualifier q = new Qualifier();
 		q.setClassid(refereed.getValue());
 		q.setSchemename(refereed.getValue());
 		q.setSchemeid("dnet:review_levels");
 		q.setSchemename("dnet:review_levels");
 		return q;
 	}
 	private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
 		ResultProtos.Result r = oaf.getEntity().getResult();
 		if (r.getExternalReferenceCount() > 0) {
 			return r
 				.getExternalReferenceList()
 				.stream()
 				.map(e -> convertExtRef(e))
 				.collect(Collectors.toList());
 		}
 		return Lists.newArrayList();
 	}
 	private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
 		ExternalReference ex = new ExternalReference();
 		ex.setUrl(e.getUrl());
 		ex.setSitename(e.getSitename());
 		ex.setRefidentifier(e.getRefidentifier());
 		ex.setQuery(e.getQuery());
 		ex.setQualifier(mapQualifier(e.getQualifier()));
 		ex.setLabel(e.getLabel());
 		ex.setDescription(e.getDescription());
 		ex.setDataInfo(ex.getDataInfo());
 		return ex;
 	}
 	private static Organization convertOrganization(OafProtos.Oaf oaf) {
 		final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
 		final Organization org = setOaf(new Organization(), oaf);
 		setEntity(org, oaf);
 		org.setLegalshortname(mapStringField(m.getLegalshortname()));
 		org.setLegalname(mapStringField(m.getLegalname()));
 		org
 			.setAlternativeNames(
 				m
 					.getAlternativeNamesList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
 		org.setLogourl(mapStringField(m.getLogourl()));
 		org.setEclegalbody(mapStringField(m.getEclegalbody()));
 		org.setEclegalperson(mapStringField(m.getEclegalperson()));
 		org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
 		org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
 		org.setEchighereducation(mapStringField(m.getEchighereducation()));
 		org
 			.setEcinternationalorganizationeurinterests(
 				mapStringField(m.getEcinternationalorganizationeurinterests()));
 		org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
 		org.setEcenterprise(mapStringField(m.getEcenterprise()));
 		org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
 		org.setEcnutscode(mapStringField(m.getEcnutscode()));
 		org.setCountry(mapQualifier(m.getCountry()));
 		return org;
 	}
 	private static Datasource convertDataSource(OafProtos.Oaf oaf) {
 		final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
 		final Datasource datasource = setOaf(new Datasource(), oaf);
 		setEntity(datasource, oaf);
 		datasource
 			.setAccessinfopackage(
 				m
 					.getAccessinfopackageList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		datasource.setCertificates(mapStringField(m.getCertificates()));
 		datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
 		datasource.setContactemail(mapStringField(m.getContactemail()));
 		datasource.setDatabaseaccessrestriction(mapStringField(m.getDatabaseaccessrestriction()));
 		datasource.setDatabaseaccesstype(mapStringField(m.getDatabaseaccesstype()));
 		datasource.setDataprovider(mapBoolField(m.getDataprovider()));
 		datasource.setDatasourcetype(mapQualifier(m.getDatasourcetype()));
 		datasource.setDatauploadrestriction(mapStringField(m.getDatauploadrestriction()));
 		datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
 		datasource.setDatauploadtype(mapStringField(m.getDatauploadtype()));
 		datasource.setDateofvalidation(mapStringField(m.getDateofvalidation()));
 		datasource.setDescription(mapStringField(m.getDescription()));
 		datasource.setEnglishname(mapStringField(m.getEnglishname()));
 		datasource.setLatitude(mapStringField(m.getLatitude()));
 		datasource.setLongitude(mapStringField(m.getLongitude()));
 		datasource.setLogourl(mapStringField(m.getLogourl()));
 		datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
 		datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
 		datasource
 			.setOdcontenttypes(
 				m
 					.getOdcontenttypesList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		datasource
 			.setOdlanguages(
 				m
 					.getOdlanguagesList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
 		datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
 		datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
 		datasource.setOfficialname(mapStringField(m.getOfficialname()));
 		datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
 		datasource.setPidsystems(mapStringField(m.getPidsystems()));
 		datasource
 			.setPolicies(
 				m.getPoliciesList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
 		datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
 		datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
 		datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
 		datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
 		datasource
 			.setSubjects(
 				m
 					.getSubjectsList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		datasource.setVersioning(mapBoolField(m.getVersioning()));
 		datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
 		datasource.setJournal(mapJournal(m.getJournal()));
 		return datasource;
 	}
 	private static Project convertProject(OafProtos.Oaf oaf) {
 		final ProjectProtos.Project.Metadata m = oaf.getEntity().getProject().getMetadata();
 		final Project project = setOaf(new Project(), oaf);
 		setEntity(project, oaf);
 		project.setAcronym(mapStringField(m.getAcronym()));
 		project.setCallidentifier(mapStringField(m.getCallidentifier()));
 		project.setCode(mapStringField(m.getCode()));
 		project.setContactemail(mapStringField(m.getContactemail()));
 		project.setContactfax(mapStringField(m.getContactfax()));
 		project.setContactfullname(mapStringField(m.getContactfullname()));
 		project.setContactphone(mapStringField(m.getContactphone()));
 		project.setContracttype(mapQualifier(m.getContracttype()));
 		project.setCurrency(mapStringField(m.getCurrency()));
 		project.setDuration(mapStringField(m.getDuration()));
 		project.setEcarticle29_3(mapStringField(m.getEcarticle293()));
 		project.setEcsc39(mapStringField(m.getEcsc39()));
 		project.setOamandatepublications(mapStringField(m.getOamandatepublications()));
 		project.setStartdate(mapStringField(m.getStartdate()));
 		project.setEnddate(mapStringField(m.getEnddate()));
 		project.setFundedamount(m.getFundedamount());
 		project.setTotalcost(m.getTotalcost());
 		project.setKeywords(mapStringField(m.getKeywords()));
 		project
 			.setSubjects(
 				m
 					.getSubjectsList()
 					.stream()
 					.map(sp -> mapStructuredProperty(sp))
 					.collect(Collectors.toList()));
 		project.setTitle(mapStringField(m.getTitle()));
 		project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
 		project
 			.setFundingtree(
 				m.getFundingtreeList().stream().map(f -> mapStringField(f)).collect(Collectors.toList()));
 		project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
 		project.setSummary(mapStringField(m.getSummary()));
 		project.setOptional1(mapStringField(m.getOptional1()));
 		project.setOptional2(mapStringField(m.getOptional2()));
 		return project;
 	}
 	private static Result convertResult(OafProtos.Oaf oaf) {
 		switch (oaf.getEntity().getResult().getMetadata().getResulttype().getClassid()) {
 			case "dataset":
 				return createDataset(oaf);
 			case "publication":
 				return createPublication(oaf);
 			case "software":
 				return createSoftware(oaf);
 			case "other":
 				return createORP(oaf);
 			default:
 				Result result = setOaf(new Result(), oaf);
 				setEntity(result, oaf);
 				return setResult(result, oaf);
 		}
 	}
 	private static Software createSoftware(OafProtos.Oaf oaf) {
 		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
 		Software software = setOaf(new Software(), oaf);
 		setEntity(software, oaf);
 		setResult(software, oaf);
 		software
 			.setDocumentationUrl(
 				m
 					.getDocumentationUrlList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		software
 			.setLicense(
 				m
 					.getLicenseList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
 		software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
 		return software;
 	}
 	private static OtherResearchProduct createORP(OafProtos.Oaf oaf) {
 		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
 		OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
 		setEntity(otherResearchProducts, oaf);
 		setResult(otherResearchProducts, oaf);
 		otherResearchProducts
 			.setContactperson(
 				m
 					.getContactpersonList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		otherResearchProducts
 			.setContactgroup(
 				m
 					.getContactgroupList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		otherResearchProducts
 			.setTool(
 				m.getToolList().stream().map(ProtoConverter::mapStringField).collect(Collectors.toList()));
 		return otherResearchProducts;
 	}
 	private static Publication createPublication(OafProtos.Oaf oaf) {
 		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
 		Publication publication = setOaf(new Publication(), oaf);
 		setEntity(publication, oaf);
 		setResult(publication, oaf);
 		publication.setJournal(mapJournal(m.getJournal()));
 		return publication;
 	}
 	private static Dataset createDataset(OafProtos.Oaf oaf) {
 		ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
 		Dataset dataset = setOaf(new Dataset(), oaf);
 		setEntity(dataset, oaf);
 		setResult(dataset, oaf);
 		dataset.setStoragedate(mapStringField(m.getStoragedate()));
 		dataset.setDevice(mapStringField(m.getDevice()));
 		dataset.setSize(mapStringField(m.getSize()));
 		dataset.setVersion(mapStringField(m.getVersion()));
 		dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
 		dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
 		dataset
 			.setGeolocation(
 				m
 					.getGeolocationList()
 					.stream()
 					.map(ProtoConverter::mapGeolocation)
 					.collect(Collectors.toList()));
 		return dataset;
 	}
 	public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
 		oaf.setDataInfo(mapDataInfo(o.getDataInfo()));
 		oaf.setLastupdatetimestamp(o.getLastupdatetimestamp());
 		return oaf;
 	}
 	public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
 		// setting Entity fields
 		final OafProtos.OafEntity e = oaf.getEntity();
 		entity.setId(e.getId());
 		entity.setOriginalId(e.getOriginalIdList());
 		entity
 			.setCollectedfrom(
 				e.getCollectedfromList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
 		entity
 			.setPid(
 				e
 					.getPidList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		entity.setDateofcollection(e.getDateofcollection());
 		entity.setDateoftransformation(e.getDateoftransformation());
 		entity
 			.setExtraInfo(
 				e
 					.getExtraInfoList()
 					.stream()
 					.map(ProtoConverter::mapExtraInfo)
 					.collect(Collectors.toList()));
 		return entity;
 	}
 	public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
 		// setting Entity fields
 		final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
 		entity
 			.setAuthor(
 				m.getAuthorList().stream().map(ProtoConverter::mapAuthor).collect(Collectors.toList()));
 		entity.setResulttype(mapQualifier(m.getResulttype()));
 		entity.setLanguage(mapQualifier(m.getLanguage()));
 		entity
 			.setCountry(
 				m
 					.getCountryList()
 					.stream()
 					.map(ProtoConverter::mapQualifierAsCountry)
 					.collect(Collectors.toList()));
 		entity
 			.setSubject(
 				m
 					.getSubjectList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		entity
 			.setTitle(
 				m
 					.getTitleList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		entity
 			.setRelevantdate(
 				m
 					.getRelevantdateList()
 					.stream()
 					.map(ProtoConverter::mapStructuredProperty)
 					.collect(Collectors.toList()));
 		entity
 			.setDescription(
 				m
 					.getDescriptionList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
 		entity.setPublisher(mapStringField(m.getPublisher()));
 		entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
 		entity
 			.setSource(
 				m
 					.getSourceList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity
 			.setFulltext(
 				m
 					.getFulltextList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity
 			.setFormat(
 				m
 					.getFormatList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity
 			.setContributor(
 				m
 					.getContributorList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity.setResourcetype(mapQualifier(m.getResourcetype()));
 		entity
 			.setCoverage(
 				m
 					.getCoverageList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		entity
 			.setContext(
 				m.getContextList().stream().map(ProtoConverter::mapContext).collect(Collectors.toList()));
 		entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
 		return entity;
 	}
 	private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
 		if (instanceList != null) {
 			final Optional<FieldTypeProtos.Qualifier> min = instanceList
 				.stream()
 				.map(i -> i.getAccessright())
 				.min(new LicenseComparator());
 			final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
 			if (StringUtils.isBlank(rights.getClassid())) {
 				rights.setClassid(UNKNOWN);
 			}
 			if (StringUtils.isBlank(rights.getClassname())
 				|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
 				rights.setClassname(NOT_AVAILABLE);
 			}
 			if (StringUtils.isBlank(rights.getSchemeid())) {
 				rights.setSchemeid(DNET_ACCESS_MODES);
 			}
 			if (StringUtils.isBlank(rights.getSchemename())) {
 				rights.setSchemename(DNET_ACCESS_MODES);
 			}
 			return rights;
 		}
 		return null;
 	}
 	private static Context mapContext(ResultProtos.Result.Context context) {
 		if (context == null || StringUtils.isBlank(context.getId())) {
 			return null;
 		}
 		final Context entity = new Context();
 		entity.setId(context.getId());
 		entity
 			.setDataInfo(
 				context
 					.getDataInfoList()
 					.stream()
 					.map(ProtoConverter::mapDataInfo)
 					.collect(Collectors.toList()));
 		return entity;
 	}
 	public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
 		if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
 			return null;
 		}
 		final KeyValue keyValue = new KeyValue();
 		keyValue.setKey(kv.getKey());
 		keyValue.setValue(kv.getValue());
 		keyValue.setDataInfo(mapDataInfo(kv.getDataInfo()));
 		return keyValue;
 	}
 	public static DataInfo mapDataInfo(FieldTypeProtos.DataInfo d) {
 		final DataInfo dataInfo = new DataInfo();
 		dataInfo.setDeletedbyinference(d.getDeletedbyinference());
 		dataInfo.setInferenceprovenance(d.getInferenceprovenance());
 		dataInfo.setInferred(d.getInferred());
 		dataInfo.setInvisible(d.getInvisible());
 		dataInfo.setProvenanceaction(mapQualifier(d.getProvenanceaction()));
 		dataInfo.setTrust(d.getTrust());
 		return dataInfo;
 	}
 	public static Qualifier mapQualifier(FieldTypeProtos.Qualifier q) {
 		final Qualifier qualifier = new Qualifier();
 		qualifier.setClassid(q.getClassid());
 		qualifier.setClassname(q.getClassname());
 		qualifier.setSchemeid(q.getSchemeid());
 		qualifier.setSchemename(q.getSchemename());
 		return qualifier;
 	}
 	public static Country mapQualifierAsCountry(FieldTypeProtos.Qualifier q) {
 		final Country c = new Country();
 		c.setClassid(q.getClassid());
 		c.setClassname(q.getClassname());
 		c.setSchemeid(q.getSchemeid());
 		c.setSchemename(q.getSchemename());
 		c.setDataInfo(mapDataInfo(q.getDataInfo()));
 		return c;
 	}
 	public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
 		if (sp == null | StringUtils.isBlank(sp.getValue())) {
 			return null;
 		}
 		final StructuredProperty structuredProperty = new StructuredProperty();
 		structuredProperty.setValue(sp.getValue());
 		structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
 		structuredProperty.setDataInfo(mapDataInfo(sp.getDataInfo()));
 		return structuredProperty;
 	}
 	public static ExtraInfo mapExtraInfo(FieldTypeProtos.ExtraInfo extraInfo) {
 		final ExtraInfo entity = new ExtraInfo();
 		entity.setName(extraInfo.getName());
 		entity.setTypology(extraInfo.getTypology());
 		entity.setProvenance(extraInfo.getProvenance());
 		entity.setTrust(extraInfo.getTrust());
 		entity.setValue(extraInfo.getValue());
 		return entity;
 	}
 	public static OAIProvenance mapOAIProvenance(FieldTypeProtos.OAIProvenance oaiProvenance) {
 		final OAIProvenance entity = new OAIProvenance();
 		entity.setOriginDescription(mapOriginalDescription(oaiProvenance.getOriginDescription()));
 		return entity;
 	}
 	public static OriginDescription mapOriginalDescription(
 		FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
 		final OriginDescription originDescriptionResult = new OriginDescription();
 		originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
 		originDescriptionResult.setAltered(originDescription.getAltered());
 		originDescriptionResult.setBaseURL(originDescription.getBaseURL());
 		originDescriptionResult.setIdentifier(originDescription.getIdentifier());
 		originDescriptionResult.setDatestamp(originDescription.getDatestamp());
 		originDescriptionResult.setMetadataNamespace(originDescription.getMetadataNamespace());
 		return originDescriptionResult;
 	}
 	public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
 		if (s == null || StringUtils.isBlank(s.getValue())) {
 			return null;
 		}
 		final Field<String> stringField = new Field<>();
 		stringField.setValue(s.getValue());
 		stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
 		return stringField;
 	}
 	public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
 		if (b == null) {
 			return null;
 		}
 		final Field<Boolean> booleanField = new Field<>();
 		booleanField.setValue(b.getValue());
 		booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
 		return booleanField;
 	}
 	public static Journal mapJournal(FieldTypeProtos.Journal j) {
 		final Journal journal = new Journal();
 		journal.setConferencedate(j.getConferencedate());
 		journal.setConferenceplace(j.getConferenceplace());
 		journal.setEdition(j.getEdition());
 		journal.setEp(j.getEp());
 		journal.setIss(j.getIss());
 		journal.setIssnLinking(j.getIssnLinking());
 		journal.setIssnOnline(j.getIssnOnline());
 		journal.setIssnPrinted(j.getIssnPrinted());
 		journal.setName(j.getName());
 		journal.setSp(j.getSp());
 		journal.setVol(j.getVol());
 		journal.setDataInfo(mapDataInfo(j.getDataInfo()));
 		return journal;
 	}
 	public static Author mapAuthor(FieldTypeProtos.Author author) {
 		final Author entity = new Author();
 		entity.setFullname(author.getFullname());
 		entity.setName(author.getName());
 		entity.setSurname(author.getSurname());
 		entity.setRank(author.getRank());
 		entity
 			.setPid(
 				author
 					.getPidList()
 					.stream()
 					.map(
 						kv -> {
 							final StructuredProperty sp = new StructuredProperty();
 							sp.setValue(kv.getValue());
 							final Qualifier q = new Qualifier();
 							q.setClassid(kv.getKey());
 							q.setClassname(kv.getKey());
 							sp.setQualifier(q);
 							return sp;
 						})
 					.collect(Collectors.toList()));
 		entity
 			.setAffiliation(
 				author
 					.getAffiliationList()
 					.stream()
 					.map(ProtoConverter::mapStringField)
 					.collect(Collectors.toList()));
 		return entity;
 	}
 	public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
 		final GeoLocation entity = new GeoLocation();
 		entity.setPoint(geoLocation.getPoint());
 		entity.setBox(geoLocation.getBox());
 		entity.setPlace(geoLocation.getPlace());
 		return entity;
 	}
 }
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/TransformActions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/TransformActions.java
@ -1,172 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.migration;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.LinkedList;
 import java.util.Objects;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import com.google.protobuf.InvalidProtocolBufferException;
 import eu.dnetlib.data.proto.OafProtos;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import scala.Tuple2;
 public class TransformActions implements Serializable {
 	private static final Logger log = LoggerFactory.getLogger(TransformActions.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static final String SEPARATOR = "/";
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					MigrateActionSet.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json")));
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String isLookupUrl = parser.get("isLookupUrl");
 		log.info("isLookupUrl: {}", isLookupUrl);
 		final String inputPaths = parser.get("inputPaths");
 		if (StringUtils.isBlank(inputPaths)) {
 			throw new RuntimeException("empty inputPaths");
 		}
 		log.info("inputPaths: {}", inputPaths);
 		final String targetBaseDir = getTargetBaseDir(isLookupUrl);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf, isSparkSessionManaged, spark -> transformActions(inputPaths, targetBaseDir, spark));
 	}
 	private static void transformActions(String inputPaths, String targetBaseDir, SparkSession spark)
 		throws IOException {
 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
 		for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
 			LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
 			final String rawset = pathQ.pollLast();
 			final String actionSetDirectory = pathQ.pollLast();
 			final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
 			if (fs.exists(targetDirectory)) {
 				log.info("found target directory '{}", targetDirectory);
 				fs.delete(targetDirectory, true);
 				log.info("deleted target directory '{}", targetDirectory);
 			}
 			log.info("transforming actions from '{}' to '{}'", sourcePath, targetDirectory);
 			sc
 				.sequenceFile(sourcePath, Text.class, Text.class)
 				.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
 				.map(TransformActions::doTransform)
 				.filter(Objects::nonNull)
 				.mapToPair(
 					a -> new Tuple2<>(a.getClazz().toString(), OBJECT_MAPPER.writeValueAsString(a)))
 				.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
 				.saveAsNewAPIHadoopFile(
 					targetDirectory.toString(),
 					Text.class,
 					Text.class,
 					SequenceFileOutputFormat.class,
 					sc.hadoopConfiguration());
 		}
 	}
 	private static AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
 		throws InvalidProtocolBufferException {
 		// dedup similarity relations had empty target value, don't migrate them
 		if (aa.getTargetValue().length == 0) {
 			return null;
 		}
 		final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
 		final Oaf oaf = ProtoConverter.convert(proto_oaf);
 		switch (proto_oaf.getKind()) {
 			case entity:
 				switch (proto_oaf.getEntity().getType()) {
 					case datasource:
 						return new AtomicAction<>(Datasource.class, (Datasource) oaf);
 					case organization:
 						return new AtomicAction<>(Organization.class, (Organization) oaf);
 					case project:
 						return new AtomicAction<>(Project.class, (Project) oaf);
 					case result:
 						final String resulttypeid = proto_oaf
 							.getEntity()
 							.getResult()
 							.getMetadata()
 							.getResulttype()
 							.getClassid();
 						switch (resulttypeid) {
 							case "publication":
 								return new AtomicAction<>(Publication.class, (Publication) oaf);
 							case "software":
 								return new AtomicAction<>(Software.class, (Software) oaf);
 							case "other":
 								return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
 							case "dataset":
 								return new AtomicAction<>(Dataset.class, (Dataset) oaf);
 							default:
 								// can be an update, where the resulttype is not specified
 								return new AtomicAction<>(Result.class, (Result) oaf);
 						}
 					default:
 						throw new IllegalArgumentException(
 							"invalid entity type: " + proto_oaf.getEntity().getType());
 				}
 			case relation:
 				return new AtomicAction<>(Relation.class, (Relation) oaf);
 			default:
 				throw new IllegalArgumentException("invalid kind: " + proto_oaf.getKind());
 		}
 	}
 	private static String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
 		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
 		String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
 		return isLookUp.getResourceProfileByQuery(XQUERY);
 	}
 }
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
@ -5,12 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
 import java.io.IOException;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
@ -68,6 +68,12 @@ public class PromoteActionPayloadForGraphTableJob {
 		MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
 		logger.info("strategy: {}", strategy);
 		Boolean shouldGroupById = Optional
 			.ofNullable(parser.get("shouldGroupById"))
 			.map(Boolean::valueOf)
 			.orElse(true);
 		logger.info("shouldGroupById: {}", shouldGroupById);
 		Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
 		Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
@ -89,7 +95,8 @@ public class PromoteActionPayloadForGraphTableJob {
 					outputGraphTablePath,
 					strategy,
 					rowClazz,
-					actionPayloadClazz);
+					actionPayloadClazz,
 					shouldGroupById);
 			});
 	}
@ -115,12 +122,12 @@ public class PromoteActionPayloadForGraphTableJob {
 		String outputGraphTablePath,
 		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
-		Class<A> actionPayloadClazz) {
+		Class<A> actionPayloadClazz, Boolean shouldGroupById) {
 		Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
 		Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
 		Dataset<G> result = promoteActionPayloadForGraphTable(
-			rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
+			rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
 				.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
 		saveGraphTable(result, outputGraphTablePath);
@ -153,9 +160,9 @@ public class PromoteActionPayloadForGraphTableJob {
 	private static String extractPayload(Row value) {
 		try {
-			return value.<String> getAs("payload");
+			return value.getAs("payload");
 		} catch (IllegalArgumentException | ClassCastException e) {
-			logger.error("cannot extract payload from action: {}", value.toString());
+			logger.error("cannot extract payload from action: {}", value);
 			throw e;
 		}
 	}
@ -174,7 +181,8 @@ public class PromoteActionPayloadForGraphTableJob {
 		Dataset<A> actionPayloadDS,
 		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
-		Class<A> actionPayloadClazz) {
+		Class<A> actionPayloadClazz,
 		Boolean shouldGroupById) {
 		logger
 			.info(
 				"Promoting action payload for graph table: payload={}, table={}",
@ -186,7 +194,7 @@ public class PromoteActionPayloadForGraphTableJob {
 		SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
 		SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
 		SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
-		SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
+		SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
 		Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
 			.joinGraphTableWithActionPayloadAndMerge(
@ -198,9 +206,13 @@ public class PromoteActionPayloadForGraphTableJob {
 				rowClazz,
 				actionPayloadClazz);
 		if (shouldGroupById) {
 			return PromoteActionPayloadFunctions
 				.groupGraphTableByIdAndMerge(
 					joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
 		} else {
 			return joinedAndMerged;
 		}
 	}
 	private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
@ -226,12 +238,13 @@ public class PromoteActionPayloadForGraphTableJob {
 		}
 	}
-	private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSource() {
+	private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSourceAndTarget() {
 		return t -> {
 			if (isSubClass(t, Relation.class)) {
-				return Objects.nonNull(((Relation) t).getSource());
+				final Relation rel = (Relation) t;
 				return StringUtils.isNotBlank(rel.getSource()) && StringUtils.isNotBlank(rel.getTarget());
 			}
-			return Objects.nonNull(((OafEntity) t).getId());
+			return StringUtils.isNotBlank(((OafEntity) t).getId());
 		};
 	}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
@ -112,6 +112,7 @@ public class PromoteActionPayloadFunctions {
 		Class<G> rowClazz) {
 		TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
 		return rowDS
 			.filter((FilterFunction<G>) o -> isNotZeroFn.get().apply(o))
 			.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
 			.agg(aggregator)
 			.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json
@ -1,56 +0,0 @@
 [
  {
    "paramName": "issm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "when true will stop SparkSession after job execution",
    "paramRequired": false
  },
  {
    "paramName": "is",
    "paramLongName": "isLookupUrl",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
  {
    "paramName": "sn",
    "paramLongName": "sourceNameNode",
    "paramDescription": "nameNode of the source cluster",
    "paramRequired": true
  },
  {
    "paramName": "tn",
    "paramLongName": "targetNameNode",
    "paramDescription": "namoNode of the target cluster",
    "paramRequired": true
  },
  {
    "paramName": "w",
    "paramLongName": "workingDirectory",
    "paramDescription": "working directory",
    "paramRequired": true
  },
  {
    "paramName": "nm",
    "paramLongName": "distcp_num_maps",
    "paramDescription": "maximum number of map tasks used in the distcp process",
    "paramRequired": true
  },
  {
    "paramName": "mm",
    "paramLongName": "distcp_memory_mb",
    "paramDescription": "memory for distcp action copying actionsets from remote cluster",
    "paramRequired": true
  },
  {
    "paramName": "tt",
    "paramLongName": "distcp_task_timeout",
    "paramDescription": "timeout for distcp copying actions from remote cluster",
    "paramRequired": true
  },
  {
    "paramName": "tr",
    "paramLongName": "transform_only",
    "paramDescription": "activate tranform-only mode. Only apply transformation step",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json
@ -1,20 +0,0 @@
 [
  {
    "paramName": "issm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "when true will stop SparkSession after job execution",
    "paramRequired": false
  },
  {
    "paramName": "is",
    "paramLongName": "isLookupUrl",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
  {
    "paramName": "i",
    "paramLongName": "inputPaths",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
@ -40,5 +40,11 @@
    "paramLongName": "mergeAndGetStrategy",
    "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
    "paramRequired": true
  },
  {
    "paramName": "sgid",
    "paramLongName": "shouldGroupById",
    "paramDescription": "indicates whether the promotion operation should group objects in the graph by id or not",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
        <property>
            <name>shouldGroupById</name>
            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -111,6 +115,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
        <error to="Kill"/>
@ -162,6 +167,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
@ -56,6 +56,11 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
        <property>
            <name>shouldGroupById</name>
            <value>false</value>
            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/migration/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/migration/oozie_app/workflow.xml
@ -1,138 +0,0 @@
 <workflow-app xmlns='uri:oozie:workflow:0.5' name='migrate_actions'>
    <parameters>
        <property>
            <name>sourceNN</name>
            <description>the source name node</description>
        </property>
        <property>
            <name>isLookupUrl</name>
            <description>the isLookup service endpoint</description>
        </property>
        <property>
            <name>workingDirectory</name>
            <description>working directory</description>
        </property>
        <property>
            <name>distcp_memory_mb</name>
            <value>6144</value>
            <description>memory for distcp copying actionsets from remote cluster</description>
        </property>
        <property>
            <name>distcp_task_timeout</name>
            <value>60000000</value>
            <description>timeout for distcp copying actions from remote cluster</description>
        </property>
        <property>
            <name>distcp_num_maps</name>
            <value>1</value>
            <description>mmaximum number of map tasks used in the distcp process</description>
        </property>
        <property>
            <name>transform_only</name>
            <description>activate tranform-only mode. Only apply transformation step</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>oozieActionShareLibForSpark2</name>
            <description>oozie action sharelib for spark 2.*</description>
        </property>
        <property>
            <name>spark2ExtraListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
            <description>spark 2.* extra listeners classname</description>
        </property>
        <property>
            <name>spark2SqlQueryExecutionListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
            <description>spark 2.* sql query execution listeners classname</description>
        </property>
        <property>
            <name>spark2YarnHistoryServerAddress</name>
            <description>spark 2.* yarn history server address</description>
        </property>
        <property>
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>mapreduce.job.queuename</name>
                <value>${queueName}</value>
            </property>
            <property>
                <name>oozie.launcher.mapred.job.queue.name</name>
                <value>${oozieLauncherQueueName}</value>
            </property>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="migrate_actionsets"/>
    <action name="migrate_actionsets">
        <java>
            <main-class>eu.dnetlib.dhp.actionmanager.migration.MigrateActionSet</main-class>
            <java-opt>-Dmapred.task.timeout=${distcp_task_timeout}</java-opt>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--sourceNameNode</arg><arg>${sourceNN}</arg>
            <arg>--targetNameNode</arg><arg>${nameNode}</arg>
            <arg>--workingDirectory</arg><arg>${workingDirectory}</arg>
            <arg>--distcp_num_maps</arg><arg>${distcp_num_maps}</arg>
            <arg>--distcp_memory_mb</arg><arg>${distcp_memory_mb}</arg>
            <arg>--distcp_task_timeout</arg><arg>${distcp_task_timeout}</arg>
            <arg>--transform_only</arg><arg>${transform_only}</arg>
            <capture-output/>
        </java>
        <ok to="transform_actions" />
        <error to="fail" />
    </action>
    <action name="transform_actions">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>transform_actions</name>
            <class>eu.dnetlib.dhp.actionmanager.migration.TransformActions</class>
            <jar>dhp-actionmanager-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--inputPaths</arg><arg>${wf:actionData('migrate_actionsets')['target_paths']}</arg>
        </spark>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>migrate_actions failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end" />
 </workflow-app>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
        <property>
            <name>shouldGroupById</name>
            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -110,6 +114,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
        <error to="Kill"/>
@ -161,6 +166,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
        <property>
            <name>shouldGroupById</name>
            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -111,6 +115,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
        <error to="Kill"/>
@ -162,6 +167,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
@ -24,6 +24,10 @@
            <name>mergeAndGetStrategy</name>
            <description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
        </property>
        <property>
            <name>shouldGroupById</name>
            <description>indicates whether the promotion operation should group objects in the graph by id or not</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -110,6 +114,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
        <error to="Kill"/>
@ -161,6 +166,7 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java
+++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java
@ -101,7 +101,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
 							"-outputGraphTablePath",
 							"",
 							"-mergeAndGetStrategy",
-							MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
+							MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name(),
 							"--shouldGroupById",
 							"true"
 						}));
 			// then
@ -141,7 +143,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
 						"-outputGraphTablePath",
 						outputGraphTableDir.toString(),
 						"-mergeAndGetStrategy",
-						strategy.name()
+						strategy.name(),
 						"--shouldGroupById",
 						"true"
 					});
 			// then
--- a/dhp-workflows/dhp-aggregation/README.md
+++ b/dhp-workflows/dhp-aggregation/README.md
@ -1,29 +1,27 @@
 Description of the Module
 --------------------------
-This module defines a **collector worker application** that runs on Hadoop.
+This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
 Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
 the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping 
 of each MDStore.
-It is responsible for harvesting metadata using different plugins.
+## Metadata collection
-The collector worker uses a message queue to inform the progress 
+The **metadata collection workflow** is responsible for harvesting metadata records from different protocols and responding to 
-of the harvesting action (using a message queue for sending **ONGOING** messages) furthermore, 
+different formats and to store them as on HDFS so that they can be further processed. 
 It gives, at the end of the job, some information about the status 
 of the collection i.e Number of records collected(using a message queue for sending **REPORT** messages).
-To work the collection worker need some parameter like:
+### Collector Plugins
-* **hdfsPath**: the path where storing the sequential file
+Different protocols are managed by dedicated Collector plugins, i.e. java programs implementing a defined interface:
 * **apidescriptor**: the JSON encoding of the API Descriptor
 * **namenode**: the Name Node URI
 * **userHDFS**: the user wich create the hdfs seq file
 * **rabbitUser**: the user to connect with RabbitMq for messaging
 * **rabbitPassWord**: the password to connect with RabbitMq for messaging
 * **rabbitHost**: the host of the RabbitMq server
 * **rabbitOngoingQueue**: the name of the ongoing queue
 * **rabbitReportQueue**: the name of the report queue
 * **workflowId**: the identifier of the dnet Workflow
-##Plugins
+```eu.dnetlib.dhp.collection.plugin.CollectorPlugin```
 * OAI Plugin 
-## Usage
+The list of the supported plugins:
 * OAI Plugin: collects from OAI-PMH compatible endpoints
 * MDStore plugin: collects from a given D-Net MetadataStore, (identified by moogodb URI, dbName, MDStoreID)
 * MDStore dump plugin: collects from an MDStore dump stored on the HDFS location indicated by the `path` parameter 
 # Transformation Plugins
 TODO
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@ -7,10 +7,44 @@
        <version>1.2.4-SNAPSHOT</version>
    </parent>
    <artifactId>dhp-aggregation</artifactId>
    <build>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>${net.alchim31.maven.version}</version>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>initialize</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>scala-test-compile</id>
                        <phase>process-test-resources</phase>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <dependencies>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
@ -24,20 +58,8 @@
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-common</artifactId>
            <version>${project.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>com.sun.xml.bind</groupId>
                    <artifactId>jaxb-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
         <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-schemas</artifactId>
        </dependency>
        <dependency>
            <groupId>net.sf.saxon</groupId>
            <artifactId>Saxon-HE</artifactId>
@ -57,6 +79,11 @@
            <artifactId>jaxen</artifactId>
        </dependency>
        <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
        <dependency>
            <groupId>org.apache.commons</groupId>
@ -77,7 +104,10 @@
            <artifactId>commons-compress</artifactId>
        </dependency>
-
+        <dependency>
            <groupId>org.mongodb</groupId>
            <artifactId>mongo-java-driver</artifactId>
        </dependency>
    </dependencies>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
@ -75,7 +75,6 @@ public class CollectAndSave implements Serializable {
 			.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
 			.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
 		;
 	}
 	private static void removeOutputDir(SparkSession spark, String path) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
@ -36,7 +36,7 @@ import scala.Tuple2;
 */
 public class SparkAtomicActionScoreJob implements Serializable {
-	private static String DOI = "doi";
+	private static final String DOI = "doi";
 	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
@ -0,0 +1,86 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import org.apache.commons.io.IOUtils
 import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest}
 import org.apache.http.entity.StringEntity
 import org.apache.http.impl.client.HttpClients
 import java.io.IOException
 abstract class AbstractRestClient extends Iterator[String]{
  var buffer: List[String] = List()
  var current_index:Int = 0
  var scroll_value: Option[String] = None
  var complete:Boolean = false
  def extractInfo(input: String): Unit
  protected def getBufferData(): Unit
  def doHTTPGETRequest(url:String): String = {
    val httpGet = new HttpGet(url)
    doHTTPRequest(httpGet)
  }
  def doHTTPPOSTRequest(url:String, json:String): String = {
    val httpPost = new HttpPost(url)
    if (json != null) {
      val entity = new StringEntity(json)
      httpPost.setEntity(entity)
      httpPost.setHeader("Accept", "application/json")
      httpPost.setHeader("Content-type", "application/json")
    }
    doHTTPRequest(httpPost)
  }
  def hasNext: Boolean = {
    buffer.nonEmpty && current_index < buffer.size
  }
  override def next(): String = {
    val next_item:String = buffer(current_index)
    current_index = current_index + 1
    if (current_index == buffer.size)
      getBufferData()
    next_item
  }
  private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
    val client = HttpClients.createDefault
    var tries = 4
    try {
      while (tries > 0) {
        println(s"requesting ${r.getURI}")
        val response = client.execute(r)
        println(s"get response with status${response.getStatusLine.getStatusCode}")
        if (response.getStatusLine.getStatusCode > 400) {
          tries -= 1
        }
        else
          return IOUtils.toString(response.getEntity.getContent)
      }
      ""
    } catch {
      case e: Throwable =>
        throw new RuntimeException("Error on executing request ", e)
    } finally try client.close()
    catch {
      case e: IOException =>
        throw new RuntimeException("Unable to close client ", e)
    }
  }
  getBufferData()
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
@ -0,0 +1,31 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import org.json4s.{DefaultFormats, JValue}
 import org.json4s.jackson.JsonMethods.{compact, parse, render}
 class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = -1) extends AbstractRestClient {
  override def extractInfo(input: String): Unit = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: org.json4s.JValue = parse(input)
    buffer = (json \ "data").extract[List[JValue]].map(s => compact(render(s)))
    val next_url = (json \ "links" \ "next").extractOrElse[String](null)
    scroll_value = if (next_url != null && next_url.nonEmpty) Some(next_url) else None
    if (scroll_value.isEmpty)
      complete = true
    current_index = 0
  }
  def get_url():String ={
    val to = if (until> 0) s"$until" else "*"
    s"https://api.datacite.org/dois?page[cursor]=1&page[size]=$blocks&query=updated:[$timestamp%20TO%20$to]"
  }
  override def getBufferData(): Unit = {
    if (!complete) {
      val response = if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) else doHTTPGETRequest(get_url())
      extractInfo(response)
    }
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
@ -0,0 +1,500 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import com.fasterxml.jackson.databind.ObjectMapper
 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
 import eu.dnetlib.dhp.schema.action.AtomicAction
 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
 import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
 import eu.dnetlib.dhp.utils.DHPUtils
 import org.apache.commons.lang3.StringUtils
 import org.json4s.DefaultFormats
 import org.json4s.JsonAST.{JField, JObject, JString}
 import org.json4s.jackson.JsonMethods.parse
 import java.nio.charset.CodingErrorAction
 import java.text.SimpleDateFormat
 import java.time.LocalDate
 import java.time.format.DateTimeFormatter
 import java.util.{Date, Locale}
 import java.util.regex.Pattern
 import scala.collection.JavaConverters._
 import scala.io.{Codec, Source}
 case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
 case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
 case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
 case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
 case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {}
 case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
 case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {}
 case class DateType(date: Option[String], dateType: Option[String]) {}
 case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {}
 object DataciteToOAFTransformation {
  implicit val codec: Codec = Codec("UTF-8")
  codec.onMalformedInput(CodingErrorAction.REPLACE)
  codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
  val DOI_CLASS = "doi"
  val SUBJ_CLASS = "keywords"
  val j_filter: List[String] = {
    val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
    s.lines.toList
  }
  val mapper = new ObjectMapper()
  val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F))
  val dataInfo: DataInfo = generateDataInfo("0.9")
  val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite")
  val hostedByMap: Map[String, HostedByMapType] = {
    val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: org.json4s.JValue = parse(s)
    json.extract[Map[String, HostedByMapType]]
  }
  val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
  val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
  val funder_regex: List[(Pattern, String)] = List(
    (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"),
    (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::")
  )
  val Date_regex: List[Pattern] = List(
    //Y-M-D
    Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE),
    //M-D-Y
    Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE),
    //D-M-Y
    Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE),
    //Y
    Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
  )
  def filter_json(json: String): Boolean = {
    j_filter.exists(f => json.contains(f))
  }
  def toActionSet(item: Oaf): (String, String) = {
    val mapper = new ObjectMapper()
    item match {
      case dataset: OafDataset =>
        val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
        a.setClazz(classOf[OafDataset])
        a.setPayload(dataset)
        (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
      case publication: Publication =>
        val a: AtomicAction[Publication] = new AtomicAction[Publication]
        a.setClazz(classOf[Publication])
        a.setPayload(publication)
        (publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
      case software: Software =>
        val a: AtomicAction[Software] = new AtomicAction[Software]
        a.setClazz(classOf[Software])
        a.setPayload(software)
        (software.getClass.getCanonicalName, mapper.writeValueAsString(a))
      case orp: OtherResearchProduct =>
        val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
        a.setClazz(classOf[OtherResearchProduct])
        a.setPayload(orp)
        (orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
      case relation: Relation =>
        val a: AtomicAction[Relation] = new AtomicAction[Relation]
        a.setClazz(classOf[Relation])
        a.setPayload(relation)
        (relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
      case _ =>
        null
    }
  }
  def embargo_end(embargo_end_date: String): Boolean = {
    val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
    val td = LocalDate.now()
    td.isAfter(dt)
  }
  def extract_date(input: String): Option[String] = {
    val d = Date_regex.map(pattern => {
      val matcher = pattern.matcher(input)
      if (matcher.find())
        matcher.group(0)
      else
        null
    }
    ).find(s => s != null)
    if (d.isDefined) {
      val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get
      try {
        return Some(LocalDate.parse(a_date, df_en).toString)
      } catch {
        case _: Throwable => try {
          return Some(LocalDate.parse(a_date, df_it).toString)
        } catch {
          case _: Throwable =>
            return None
        }
      }
    }
    d
  }
  def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = {
    if (resourceType != null && resourceType.nonEmpty) {
      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
      if (typeQualifier != null)
        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
    }
    if (schemaOrg != null && schemaOrg.nonEmpty) {
      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg)
      if (typeQualifier != null)
        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
    }
    if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
      val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral)
      if (typeQualifier != null)
        return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
    }
    null
  }
  def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = {
    val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
    if (typeQualifiers == null)
      return null
    val i = new Instance
    i.setInstancetype(typeQualifiers._1)
    typeQualifiers._2.getClassname match {
      case "dataset" =>
        val r = new OafDataset
        r.setInstance(List(i).asJava)
        return r
      case "publication" =>
        val r = new Publication
        r.setInstance(List(i).asJava)
        return r
      case "software" =>
        val r = new Software
        r.setInstance(List(i).asJava)
        return r
      case "other" =>
        val r = new OtherResearchProduct
        r.setInstance(List(i).asJava)
        return r
    }
    null
  }
  def available_date(input: String): Boolean = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: org.json4s.JValue = parse(input)
    val l: List[String] = for {
      JObject(dates) <- json \\ "dates"
      JField("dateType", JString(dateTypes)) <- dates
    } yield dateTypes
    l.exists(p => p.equalsIgnoreCase("available"))
  }
  /**
   * As describe in ticket #6377
   * when the result come from figshare we need to remove subject
   * and set Access rights OPEN.
   * @param r
   */
  def fix_figshare(r: Result): Unit = {
    if (r.getInstance() != null) {
      val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
      if (hosted_by_figshare) {
        r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
        val l: List[StructuredProperty] = List()
        r.setSubject(l.asJava)
      }
    }
  }
  def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
    OafMapperUtils.structuredProperty(dt, q, null)
  }
  def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = {
    val r = new Relation
    r.setSource(sourceId)
    r.setTarget(targetId)
    r.setRelType(ModelConstants.RESULT_PROJECT)
    r.setRelClass(relClass)
    r.setSubRelType(ModelConstants.OUTCOME)
    r.setCollectedfrom(List(cf).asJava)
    r.setDataInfo(di)
    r
  }
  def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
    val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find())
    if (match_pattern.isDefined) {
      val m = match_pattern.get._1
      val p = match_pattern.get._2
      val grantId = m.matcher(awardUri).replaceAll("$2")
      val targetId = s"$p${DHPUtils.md5(grantId)}"
      List(
        generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
        generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo)
      )
    }
    else
      List()
  }
  def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup): List[Oaf] = {
    if (filter_json(input))
      return List()
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)
    val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
    val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
    val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null)
    val doi = (json \ "attributes" \ "doi").extract[String]
    if (doi.isEmpty)
      return List()
    //Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies
    val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
    if (result == null)
      return List()
    val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES)
    val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
    result.setPid(List(pid).asJava)
    result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
    result.setOriginalId(List(doi).asJava)
    val d = new Date(dateOfCollection * 1000)
    val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
    result.setDateofcollection(ISO8601FORMAT.format(d))
    result.setDateoftransformation(ISO8601FORMAT.format(ts))
    result.setDataInfo(dataInfo)
    val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())
    val authors = creators.zipWithIndex.map { case (c, idx) =>
      val a = new Author
      a.setFullname(c.name.orNull)
      a.setName(c.givenName.orNull)
      a.setSurname(c.familyName.orNull)
      if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
        a.setPid(c.nameIdentifiers.get.map(ni => {
          val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
          if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
            OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
          }
          else
            null
        }
        )
          .asJava)
      }
      if (c.affiliation.isDefined)
        a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava)
      a.setRank(idx + 1)
      a
    }
    val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
    result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
      if (t.titleType.isEmpty) {
        OafMapperUtils.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null)
      } else {
        OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE, null)
      }
    }).asJava)
    if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
      return List()
    result.setAuthor(authors.asJava)
    val dates = (json \\ "dates").extract[List[DateType]]
    val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
    val i_date = dates
      .filter(d => d.date.isDefined && d.dateType.isDefined)
      .find(d => d.dateType.get.equalsIgnoreCase("issued"))
      .map(d => extract_date(d.date.get))
    val a_date: Option[String] = dates
      .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
      .map(d => extract_date(d.date.get))
      .find(d => d != null && d.isDefined)
      .map(d => d.get)
    if (a_date.isDefined) {
      result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null))
    }
    if (i_date.isDefined && i_date.get.isDefined) {
      result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
      result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
    }
    else if (publication_year != null) {
      result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
      result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
    }
    result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined)
      .map(d => (extract_date(d.date.get), d.dateType.get))
      .filter(d => d._1.isDefined)
      .map(d => (d._1.get, vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase())))
      .filter(d => d._2 != null)
      .map(d => generateOAFDate(d._1, d._2)).asJava)
    val subjects = (json \\ "subjects").extract[List[SubjectType]]
    result.setSubject(subjects.filter(s => s.subject.nonEmpty)
      .map(s =>
        OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null)
      ).asJava)
    result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
    val descriptions = (json \\ "descriptions").extract[List[DescriptionType]]
    result.setDescription(
      descriptions
        .filter(d => d.description.isDefined).
        map(d =>
          OafMapperUtils.field(d.description.get, null)
        ).filter(s => s != null).asJava)
    val publisher = (json \\ "publisher").extractOrElse[String](null)
    if (publisher != null)
      result.setPublisher(OafMapperUtils.field(publisher, null))
    val language: String = (json \\ "language").extractOrElse[String](null)
    if (language != null)
      result.setLanguage(vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language))
    val instance = result.getInstance().get(0)
    val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
    val accessRights: List[String] = for {
      JObject(rightsList) <- json \\ "rightsList"
      JField("rightsUri", JString(rightsUri)) <- rightsList
    } yield rightsUri
    val aRights: Option[AccessRight] = accessRights.map(r => {
      vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r)
    }).find(q => q != null).map(q => {
      val a = new AccessRight
      a.setClassid(q.getClassid)
      a.setClassname(q.getClassname)
      a.setSchemeid(q.getSchemeid)
      a.setSchemename(q.getSchemename)
      a
    })
    val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
    if (client.isDefined) {
      val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
      instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
      instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
      instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
      instance.setAccessright(access_rights_qualifier)
      instance.setPid(result.getPid)
      val license = accessRights
        .find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*"))
      if (license.isDefined)
        instance.setLicense(OafMapperUtils.field(license.get, null))
    }
    val awardUris: List[String] = for {
      JObject(fundingReferences) <- json \\ "fundingReferences"
      JField("awardUri", JString(awardUri)) <- fundingReferences
    } yield awardUri
    val relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
    fix_figshare(result)
    result.setId(IdentifierFactory.createIdentifier(result))
    if (result.getId == null)
      return List()
    if (relations != null && relations.nonEmpty) {
      List(result) ::: relations
    }
    else
      List(result)
  }
  def generateDataInfo(trust: String): DataInfo = {
    val di = new DataInfo
    di.setDeletedbyinference(false)
    di.setInferred(false)
    di.setInvisible(false)
    di.setTrust(trust)
    di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER)
    di
  }
  def generateDSId(input: String): String = {
    val b = StringUtils.substringBefore(input, "::")
    val a = StringUtils.substringAfter(input, "::")
    s"10|$b::${DHPUtils.md5(a)}"
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
@ -0,0 +1,41 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.schema.oaf.Oaf
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.io.Source
 object ExportActionSetJobNode {
  val log: Logger = LoggerFactory.getLogger(ExportActionSetJobNode.getClass)
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf
    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json")).mkString)
    parser.parseArgument(args)
    val master = parser.get("master")
    val sourcePath = parser.get("sourcePath")
    val targetPath = parser.get("targetPath")
    val spark: SparkSession = SparkSession.builder().config(conf)
      .appName(ExportActionSetJobNode.getClass.getSimpleName)
      .master(master)
      .getOrCreate()
    implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
    implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING)
    spark.read.load(sourcePath).as[Oaf]
      .map(o =>DataciteToOAFTransformation.toActionSet(o))
      .filter(o => o!= null)
      .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
@ -0,0 +1,47 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
 import eu.dnetlib.dhp.schema.mdstore.MetadataRecord
 import eu.dnetlib.dhp.schema.oaf.Oaf
 import eu.dnetlib.dhp.utils.ISLookupClientFactory
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.io.Source
 object GenerateDataciteDatasetSpark {
  val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf
    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
    parser.parseArgument(args)
    val master = parser.get("master")
    val sourcePath = parser.get("sourcePath")
    val targetPath = parser.get("targetPath")
    val isLookupUrl: String = parser.get("isLookupUrl")
    log.info("isLookupUrl: {}", isLookupUrl)
    val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
    val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
    val spark: SparkSession = SparkSession.builder().config(conf)
      .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName)
      .master(master)
      .getOrCreate()
    implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
    implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
    import spark.implicits._
    spark.read.load(sourcePath).as[DataciteType]
      .filter(d => d.isActive)
      .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies))
      .filter(d => d != null)
      .write.mode(SaveMode.Overwrite).save(targetPath)
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
@ -0,0 +1,186 @@
 package eu.dnetlib.dhp.actionmanager.datacite
 import eu.dnetlib.dhp.actionmanager.datacite.DataciteToOAFTransformation.df_it
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.io.{IntWritable, SequenceFile, Text}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods.parse
 import org.apache.spark.sql.functions.max
 import org.slf4j.{Logger, LoggerFactory}
 import java.time.format.DateTimeFormatter._
 import java.time.{LocalDate, LocalDateTime, ZoneOffset}
 import scala.io.Source
 object ImportDatacite {
  val log: Logger = LoggerFactory.getLogger(ImportDatacite.getClass)
  def convertAPIStringToDataciteItem(input: String): DataciteType = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: org.json4s.JValue = parse(input)
    val doi = (json \ "attributes" \ "doi").extract[String].toLowerCase
    val isActive = (json \ "attributes" \ "isActive").extract[Boolean]
    val timestamp_string = (json \ "attributes" \ "updated").extract[String]
    val dt = LocalDateTime.parse(timestamp_string, ISO_DATE_TIME)
    DataciteType(doi = doi, timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, isActive = isActive, json = input)
  }
  def main(args: Array[String]): Unit = {
    val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json")).mkString)
    parser.parseArgument(args)
    val master = parser.get("master")
    val hdfsuri = parser.get("namenode")
    log.info(s"namenode is $hdfsuri")
    val targetPath = parser.get("targetPath")
    log.info(s"targetPath is $targetPath")
    val dataciteDump = parser.get("dataciteDumpPath")
    log.info(s"dataciteDump is $dataciteDump")
    val hdfsTargetPath = new Path(targetPath)
    log.info(s"hdfsTargetPath is $hdfsTargetPath")
    val bs = if (parser.get("blocksize") == null) 100 else parser.get("blocksize").toInt
    val spkipImport = parser.get("skipImport")
    log.info(s"skipImport is $spkipImport")
    val spark: SparkSession = SparkSession.builder()
      .appName(ImportDatacite.getClass.getSimpleName)
      .master(master)
      .getOrCreate()
    // ====== Init HDFS File System Object
    val conf = new Configuration
    // Set FileSystem URI
    conf.set("fs.defaultFS", hdfsuri)
    // Because of Maven
    conf.set("fs.hdfs.impl", classOf[DistributedFileSystem].getName)
    conf.set("fs.file.impl", classOf[LocalFileSystem].getName)
    val sc: SparkContext = spark.sparkContext
    sc.setLogLevel("ERROR")
    import spark.implicits._
    val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable {
      override def zero: DataciteType = null
      override def reduce(a: DataciteType, b: DataciteType): DataciteType = {
        if (b == null)
          return a
        if (a == null)
          return b
        if (a.timestamp > b.timestamp) {
          return a
        }
        b
      }
      override def merge(a: DataciteType, b: DataciteType): DataciteType = {
        reduce(a, b)
      }
      override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
      override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
      override def finish(reduction: DataciteType): DataciteType = reduction
    }
    val dump: Dataset[DataciteType] = spark.read.load(dataciteDump).as[DataciteType]
    val ts = dump.select(max("timestamp")).first().getLong(0)
    println(s"last Timestamp is $ts")
    val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf, bs)
    println(s"Imported from Datacite API $cnt documents")
    if (cnt > 0) {
      val inputRdd: RDD[DataciteType] = sc.sequenceFile(targetPath, classOf[Int], classOf[Text])
        .map(s => s._2.toString)
        .map(s => convertAPIStringToDataciteItem(s))
      spark.createDataset(inputRdd).write.mode(SaveMode.Overwrite).save(s"${targetPath}_dataset")
      val ds: Dataset[DataciteType] = spark.read.load(s"${targetPath}_dataset").as[DataciteType]
      dump
        .union(ds)
        .groupByKey(_.doi)
        .agg(dataciteAggregator.toColumn)
        .map(s => s._2)
        .repartition(4000)
        .write.mode(SaveMode.Overwrite).save(s"${dataciteDump}_updated")
      val fs = FileSystem.get(sc.hadoopConfiguration)
      fs.delete(new Path(s"$dataciteDump"), true)
      fs.rename(new Path(s"${dataciteDump}_updated"), new Path(s"$dataciteDump"))
    }
  }
  private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = {
    var from:Long = timestamp * 1000
    val delta:Long = 50000000L
    var client: DataciteAPIImporter = null
    val now :Long =System.currentTimeMillis()
    var i = 0
    try {
      val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text]))
      try {
        var start: Long = System.currentTimeMillis
        while (from < now) {
          client = new DataciteAPIImporter(from, bs, from + delta)
          var end: Long = 0
          val key: IntWritable = new IntWritable(i)
          val value: Text = new Text
          while (client.hasNext) {
            key.set({
              i += 1;
              i - 1
            })
            value.set(client.next())
            writer.append(key, value)
            writer.hflush()
            if (i % 1000 == 0) {
              end = System.currentTimeMillis
              val time = (end - start) / 1000.0F
              println(s"Imported $i in $time seconds")
              start = System.currentTimeMillis
            }
          }
          println(s"updating from value: $from  -> ${from+delta}")
          from = from + delta
        }
      } catch {
        case e: Throwable =>
          println("Error", e)
      } finally if (writer != null) writer.close()
    }
    catch {
      case e: Throwable =>
        log.error("Error", e)
    }
    i
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
@ -248,7 +248,7 @@ public class PrepareProgramme {
 							parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
 						}
 						if (current.trim().length() > parent.length()
-							&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) {
+							&& current.toLowerCase().trim().startsWith(parent)) {
 							current = current.substring(parent.length() + 1);
 							if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '–') {
 								current = current.trim().substring(1).trim();
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
@ -18,7 +18,6 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -33,7 +32,6 @@ public class PrepareProjects {
 	private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
@ -93,7 +91,7 @@ public class PrepareProjects {
 	}
 	private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() {
-		return (FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject>) value -> {
+		return value -> {
 			Optional<CSVProject> csvProject = Optional.ofNullable(value._2());
 			List<CSVProject> csvProjectList = new ArrayList<>();
 			if (csvProject.isPresent()) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java
@ -1,20 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.project.httpconnector;
 import java.util.LinkedList;
 public class CollectorPluginErrorLogList extends LinkedList<String> {
 	private static final long serialVersionUID = -6925786561303289704L;
 	@Override
 	public String toString() {
 		String log = new String();
 		int index = 0;
 		for (String errorMessage : this) {
 			log += String.format("Retry #%s: %s / ", index++, errorMessage);
 		}
 		return log;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java
@ -1,20 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.project.httpconnector;
 public class CollectorServiceException extends Exception {
 	private static final long serialVersionUID = 7523999812098059764L;
 	public CollectorServiceException(String string) {
 		super(string);
 	}
 	public CollectorServiceException(String string, Throwable exception) {
 		super(string, exception);
 	}
 	public CollectorServiceException(Throwable exception) {
 		super(exception);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java
@ -1,240 +0,0 @@
 package eu.dnetlib.dhp.actionmanager.project.httpconnector;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.*;
 import java.security.GeneralSecurityException;
 import java.security.cert.X509Certificate;
 import java.util.List;
 import java.util.Map;
 import javax.net.ssl.HttpsURLConnection;
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 /**
 * @author jochen, michele, andrea
 */
 public class HttpConnector {
 	private static final Log log = LogFactory.getLog(HttpConnector.class);
 	private int maxNumberOfRetry = 6;
 	private int defaultDelay = 120; // seconds
 	private int readTimeOut = 120; // seconds
 	private String responseType = null;
 	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
 	public HttpConnector() {
 		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
 	}
 	/**
 	 * Given the URL returns the content via HTTP GET
 	 *
 	 * @param requestUrl the URL
 	 * @return the content of the downloaded resource
 	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
 	 */
 	public String getInputSource(final String requestUrl) throws CollectorServiceException {
 		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
 	}
 	/**
 	 * Given the URL returns the content as a stream via HTTP GET
 	 *
 	 * @param requestUrl the URL
 	 * @return the content of the downloaded resource as InputStream
 	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
 	 */
 	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException {
 		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
 	}
 	private String attemptDownlaodAsString(final String requestUrl, final int retryNumber,
 		final CollectorPluginErrorLogList errorList)
 		throws CollectorServiceException {
 		try {
 			InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
 			try {
 				return IOUtils.toString(s);
 			} catch (IOException e) {
 				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
 				Thread.sleep(defaultDelay * 1000);
 				errorList.add(e.getMessage());
 				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
 			} finally {
 				IOUtils.closeQuietly(s);
 			}
 		} catch (InterruptedException e) {
 			throw new CollectorServiceException(e);
 		}
 	}
 	private InputStream attemptDownload(final String requestUrl, final int retryNumber,
 		final CollectorPluginErrorLogList errorList)
 		throws CollectorServiceException {
 		if (retryNumber > maxNumberOfRetry) {
 			throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList);
 		}
 		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
 		try {
 			InputStream input = null;
 			try {
 				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
 				urlConn.setInstanceFollowRedirects(false);
 				urlConn.setReadTimeout(readTimeOut * 1000);
 				urlConn.addRequestProperty("User-Agent", userAgent);
 				if (log.isDebugEnabled()) {
 					logHeaderFields(urlConn);
 				}
 				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
 				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
 					log.warn("waiting and repeating request after " + retryAfter + " sec.");
 					Thread.sleep(retryAfter * 1000);
 					errorList.add("503 Service Unavailable");
 					urlConn.disconnect();
 					return attemptDownload(requestUrl, retryNumber + 1, errorList);
 				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM)
 					|| (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) {
 					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
 					log.debug("The requested url has been moved to " + newUrl);
 					errorList
 						.add(
 							String
 								.format(
 									"%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(),
 									newUrl));
 					urlConn.disconnect();
 					return attemptDownload(newUrl, retryNumber + 1, errorList);
 				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
 					log
 						.error(
 							String
 								.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
 					Thread.sleep(defaultDelay * 1000);
 					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
 					urlConn.disconnect();
 					return attemptDownload(requestUrl, retryNumber + 1, errorList);
 				} else {
 					input = urlConn.getInputStream();
 					responseType = urlConn.getContentType();
 					return input;
 				}
 			} catch (IOException e) {
 				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
 				Thread.sleep(defaultDelay * 1000);
 				errorList.add(e.getMessage());
 				return attemptDownload(requestUrl, retryNumber + 1, errorList);
 			}
 		} catch (InterruptedException e) {
 			throw new CollectorServiceException(e);
 		}
 	}
 	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
 		log.debug("StatusCode: " + urlConn.getResponseMessage());
 		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
 			if (e.getKey() != null) {
 				for (String v : e.getValue()) {
 					log.debug("  key: " + e.getKey() + " - value: " + v);
 				}
 			}
 		}
 	}
 	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
 		for (String key : headerMap.keySet()) {
 			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0)
 				&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
 				return Integer
 					.parseInt(headerMap.get(key).get(0)) + 10;
 			}
 		}
 		return -1;
 	}
 	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
 		for (String key : headerMap.keySet()) {
 			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) {
 				return headerMap.get(key).get(0);
 			}
 		}
 		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
 	}
 	/**
 	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
 	 */
 	public void initTrustManager() {
 		final X509TrustManager tm = new X509TrustManager() {
 			@Override
 			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
 			}
 			@Override
 			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
 			}
 			@Override
 			public X509Certificate[] getAcceptedIssuers() {
 				return null;
 			}
 		};
 		try {
 			final SSLContext ctx = SSLContext.getInstance("TLS");
 			ctx.init(null, new TrustManager[] {
 				tm
 			}, null);
 			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
 		} catch (GeneralSecurityException e) {
 			log.fatal(e);
 			throw new IllegalStateException(e);
 		}
 	}
 	public int getMaxNumberOfRetry() {
 		return maxNumberOfRetry;
 	}
 	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
 		this.maxNumberOfRetry = maxNumberOfRetry;
 	}
 	public int getDefaultDelay() {
 		return defaultDelay;
 	}
 	public void setDefaultDelay(final int defaultDelay) {
 		this.defaultDelay = defaultDelay;
 	}
 	public int getReadTimeOut() {
 		return readTimeOut;
 	}
 	public void setReadTimeOut(final int readTimeOut) {
 		this.readTimeOut = readTimeOut;
 	}
 	public String getResponseType() {
 		return responseType;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
@ -17,8 +17,8 @@ import org.apache.hadoop.fs.Path;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.collection.HttpConnector2;
 /**
 * Applies the parsing of a csv file and writes the Serialization of it in hdfs
@ -28,7 +28,7 @@ public class ReadCSV implements Closeable {
 	private final Configuration conf;
 	private final BufferedWriter writer;
 	private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-	private String csvFile;
+	private final String csvFile;
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -74,7 +74,7 @@ public class ReadCSV implements Closeable {
 		throws Exception {
 		this.conf = new Configuration();
 		this.conf.set("fs.defaultFS", hdfsNameNode);
-		HttpConnector httpConnector = new HttpConnector();
+		HttpConnector2 httpConnector = new HttpConnector2();
 		FileSystem fileSystem = FileSystem.get(this.conf);
 		Path hdfsWritePath = new Path(hdfsPath);
 		FSDataOutputStream fsDataOutputStream = null;
@ -85,7 +85,6 @@ public class ReadCSV implements Closeable {
 		this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
 		this.csvFile = httpConnector.getInputSource(fileURL);
 		;
 	}
 	protected void write(final Object p) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
@ -14,19 +14,18 @@ import org.apache.hadoop.fs.Path;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.collection.HttpConnector2;
 /**
 * Applies the parsing of an excel file and writes the Serialization of it in hdfs
 */
 public class ReadExcel implements Closeable {
 	private static final Log log = LogFactory.getLog(ReadCSV.class);
 	private final Configuration conf;
 	private final BufferedWriter writer;
 	private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-	private InputStream excelFile;
+	private final InputStream excelFile;
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -73,7 +72,7 @@ public class ReadExcel implements Closeable {
 		throws Exception {
 		this.conf = new Configuration();
 		this.conf.set("fs.defaultFS", hdfsNameNode);
-		HttpConnector httpConnector = new HttpConnector();
+		HttpConnector2 httpConnector = new HttpConnector2();
 		FileSystem fileSystem = FileSystem.get(this.conf);
 		Path hdfsWritePath = new Path(hdfsPath);
 		FSDataOutputStream fsDataOutputStream = null;
@ -84,7 +83,6 @@ public class ReadExcel implements Closeable {
 		this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
 		this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
 		;
 	}
 	protected void write(final Object p) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
@ -3,11 +3,11 @@ package eu.dnetlib.dhp.actionmanager.ror;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
 import java.io.InputStream;
 import java.util.ArrayList;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
@ -0,0 +1,45 @@
 package eu.dnetlib.dhp.aggregation.common;
 import java.io.Serializable;
 import org.apache.spark.util.LongAccumulator;
 public class AggregationCounter implements Serializable {
 	private LongAccumulator totalItems;
 	private LongAccumulator errorItems;
 	private LongAccumulator processedItems;
 	public AggregationCounter() {
 	}
 	public AggregationCounter(LongAccumulator totalItems, LongAccumulator errorItems, LongAccumulator processedItems) {
 		this.totalItems = totalItems;
 		this.errorItems = errorItems;
 		this.processedItems = processedItems;
 	}
 	public LongAccumulator getTotalItems() {
 		return totalItems;
 	}
 	public void setTotalItems(LongAccumulator totalItems) {
 		this.totalItems = totalItems;
 	}
 	public LongAccumulator getErrorItems() {
 		return errorItems;
 	}
 	public void setErrorItems(LongAccumulator errorItems) {
 		this.errorItems = errorItems;
 	}
 	public LongAccumulator getProcessedItems() {
 		return processedItems;
 	}
 	public void setProcessedItems(LongAccumulator processedItems) {
 		this.processedItems = processedItems;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
@ -0,0 +1,47 @@
 package eu.dnetlib.dhp.aggregation.common;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.Objects;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.message.MessageSender;
 import eu.dnetlib.dhp.utils.DHPUtils;
 public class AggregatorReport extends LinkedHashMap<String, String> implements Closeable {
 	private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
 	private MessageSender messageSender;
 	public AggregatorReport() {
 	}
 	public AggregatorReport(MessageSender messageSender) throws IOException {
 		this.messageSender = messageSender;
 	}
 	public void ongoing(Long current, Long total) {
 		messageSender.sendMessage(current, total);
 	}
 	@Override
 	public void close() throws IOException {
 		if (Objects.nonNull(messageSender)) {
 			log.info("closing report: ");
 			this.forEach((k, v) -> log.info("{} - {}", k, v));
 			Map<String, String> m = new HashMap<>();
 			m.put(getClass().getSimpleName().toLowerCase(), DHPUtils.MAPPER.writeValueAsString(values()));
 			messageSender.sendReport(m);
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReporterCallback.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReporterCallback.java
@ -0,0 +1,10 @@
 package eu.dnetlib.dhp.aggregation.common;
 public interface ReporterCallback {
 	Long getCurrent();
 	Long getTotal();
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
@ -0,0 +1,41 @@
 package eu.dnetlib.dhp.aggregation.common;
 import java.util.TimerTask;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 public abstract class ReportingJob {
 	/**
 	 * Frequency (seconds) for sending ongoing messages to report the collection task advancement
 	 */
 	public static final int ONGOING_REPORT_FREQUENCY = 5;
 	/**
 	 * Initial delay (seconds) for sending ongoing messages to report the collection task advancement
 	 */
 	public static final int INITIAL_DELAY = 2;
 	private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
 	protected final AggregatorReport report;
 	public ReportingJob(AggregatorReport report) {
 		this.report = report;
 	}
 	protected void schedule(final ReporterCallback callback) {
 		executor.scheduleAtFixedRate(new TimerTask() {
 			@Override
 			public void run() {
 				report.ongoing(callback.getCurrent(), callback.getTotal());
 			}
 		}, INITIAL_DELAY, ONGOING_REPORT_FREQUENCY, TimeUnit.SECONDS);
 	}
 	protected void shutdown() {
 		executor.shutdown();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java
@ -0,0 +1,136 @@
 package eu.dnetlib.dhp.aggregation.mdstore;
 import static eu.dnetlib.dhp.common.Constants.*;
 import static eu.dnetlib.dhp.utils.DHPUtils.*;
 import java.net.URI;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.rest.DNetRestClient;
 import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
 public class MDStoreActionNode {
 	private static final Logger log = LoggerFactory.getLogger(MDStoreActionNode.class);
 	enum MDAction {
 		NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
 	}
 	public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
 	public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
 	public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
 	public static final String READ_LOCK_URL = "%s/mdstore/%s/startReading";
 	public static final String READ_UNLOCK_URL = "%s/version/%s/endReading";
 	private static final String MDSTOREVERSIONPARAM = "mdStoreVersion";
 	private static final String MDSTOREREADLOCKPARAM = "mdStoreReadLockVersion";
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					MDStoreActionNode.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/collection/mdstore_action_parameters.json")));
 		argumentParser.parseArgument(args);
 		log.info("Java Xmx: {}m", Runtime.getRuntime().maxMemory() / (1024 * 1024));
 		final MDAction action = MDAction.valueOf(argumentParser.get("action"));
 		log.info("Current action is {}", action);
 		final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI");
 		log.info("mdStoreManagerURI is {}", mdStoreManagerURI);
 		switch (action) {
 			case NEW_VERSION: {
 				final String mdStoreID = argumentParser.get("mdStoreID");
 				if (StringUtils.isBlank(mdStoreID)) {
 					throw new IllegalArgumentException("missing or empty argument mdStoreId");
 				}
 				final MDStoreVersion currentVersion = DNetRestClient
 					.doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
 				populateOOZIEEnv(MDSTOREVERSIONPARAM, MAPPER.writeValueAsString(currentVersion));
 				break;
 			}
 			case COMMIT: {
 				final String hdfsuri = argumentParser.get("namenode");
 				if (StringUtils.isBlank(hdfsuri)) {
 					throw new IllegalArgumentException("missing or empty argument namenode");
 				}
 				final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
 				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
 				if (StringUtils.isBlank(mdStoreVersion.getId())) {
 					throw new IllegalArgumentException(
 						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
 				}
 				Path hdfstoreSizepath = new Path(mdStoreVersion.getHdfsPath() + MDSTORE_SIZE_PATH);
 				try (
 					FileSystem fs = FileSystem.get(URI.create(hdfsuri), getHadoopConfiguration(hdfsuri));
 					FSDataInputStream inputStream = fs.open(hdfstoreSizepath)) {
 					final Long mdStoreSize = Long.parseLong(IOUtils.toString(inputStream));
 					fs.create(hdfstoreSizepath);
 					DNetRestClient
 						.doGET(
 							String.format(COMMIT_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId(), mdStoreSize));
 				}
 				break;
 			}
 			case ROLLBACK: {
 				final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
 				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
 				if (StringUtils.isBlank(mdStoreVersion.getId())) {
 					throw new IllegalArgumentException(
 						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
 				}
 				DNetRestClient.doGET(String.format(ROLLBACK_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId()));
 				break;
 			}
 			case READ_LOCK: {
 				final String mdStoreID = argumentParser.get("mdStoreID");
 				if (StringUtils.isBlank(mdStoreID)) {
 					throw new IllegalArgumentException("missing or empty argument mdStoreId");
 				}
 				final MDStoreVersion currentVersion = DNetRestClient
 					.doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
 				populateOOZIEEnv(MDSTOREREADLOCKPARAM, MAPPER.writeValueAsString(currentVersion));
 				break;
 			}
 			case READ_UNLOCK: {
 				final String mdStoreVersion_params = argumentParser.get("readMDStoreId");
 				final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
 				if (StringUtils.isBlank(mdStoreVersion.getId())) {
 					throw new IllegalArgumentException(
 						"invalid MDStoreVersion value current is " + mdStoreVersion_params);
 				}
 				DNetRestClient.doGET(String.format(READ_UNLOCK_URL, mdStoreManagerURI, mdStoreVersion.getId()));
 				break;
 			}
 			default:
 				throw new IllegalArgumentException("invalid action");
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
@ -1,16 +1,16 @@
-package eu.dnetlib.dhp.collection.worker;
+package eu.dnetlib.dhp.collection;
-public class DnetCollectorException extends Exception {
+public class CollectorException extends Exception {
 	/** */
 	private static final long serialVersionUID = -290723075076039757L;
-	public DnetCollectorException() {
+	public CollectorException() {
 		super();
 	}
-	public DnetCollectorException(
+	public CollectorException(
 		final String message,
 		final Throwable cause,
 		final boolean enableSuppression,
@ -18,15 +18,15 @@ public class DnetCollectorException extends Exception {
 		super(message, cause, enableSuppression, writableStackTrace);
 	}
-	public DnetCollectorException(final String message, final Throwable cause) {
+	public CollectorException(final String message, final Throwable cause) {
 		super(message, cause);
 	}
-	public DnetCollectorException(final String message) {
+	public CollectorException(final String message) {
 		super(message);
 	}
-	public DnetCollectorException(final Throwable cause) {
+	public CollectorException(final Throwable cause) {
 		super(cause);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@ -0,0 +1,134 @@
 package eu.dnetlib.dhp.collection;
 import static eu.dnetlib.dhp.common.Constants.SEQUENCE_FILE_NAME;
 import java.io.IOException;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.DeflateCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
 import eu.dnetlib.dhp.aggregation.common.ReportingJob;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
 import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
 public class CollectorWorker extends ReportingJob {
 	private static final Logger log = LoggerFactory.getLogger(CollectorWorker.class);
 	private final ApiDescriptor api;
 	private final FileSystem fileSystem;
 	private final MDStoreVersion mdStoreVersion;
 	private final HttpClientParams clientParams;
 	public CollectorWorker(
 		final ApiDescriptor api,
 		final FileSystem fileSystem,
 		final MDStoreVersion mdStoreVersion,
 		final HttpClientParams clientParams,
 		final AggregatorReport report) {
 		super(report);
 		this.api = api;
 		this.fileSystem = fileSystem;
 		this.mdStoreVersion = mdStoreVersion;
 		this.clientParams = clientParams;
 	}
 	public void collect() throws UnknownCollectorPluginException, CollectorException, IOException {
 		final String outputPath = mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
 		log.info("outputPath path is {}", outputPath);
 		final CollectorPlugin plugin = getCollectorPlugin();
 		final AtomicInteger counter = new AtomicInteger(0);
 		scheduleReport(counter);
 		try (SequenceFile.Writer writer = SequenceFile
 			.createWriter(
 				fileSystem.getConf(),
 				SequenceFile.Writer.file(new Path(outputPath)),
 				SequenceFile.Writer.keyClass(IntWritable.class),
 				SequenceFile.Writer.valueClass(Text.class),
 				SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
 			final IntWritable key = new IntWritable(counter.get());
 			final Text value = new Text();
 			plugin
 				.collect(api, report)
 				.forEach(
 					content -> {
 						key.set(counter.getAndIncrement());
 						value.set(content);
 						try {
 							writer.append(key, value);
 						} catch (Throwable e) {
 							throw new RuntimeException(e);
 						}
 					});
 		} catch (Throwable e) {
 			report.put(e.getClass().getName(), e.getMessage());
 			throw new CollectorException(e);
 		} finally {
 			shutdown();
 			report.ongoing(counter.longValue(), counter.longValue());
 		}
 	}
 	private void scheduleReport(AtomicInteger counter) {
 		schedule(new ReporterCallback() {
 			@Override
 			public Long getCurrent() {
 				return counter.longValue();
 			}
 			@Override
 			public Long getTotal() {
 				return null;
 			}
 		});
 	}
 	private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException {
 		switch (CollectorPlugin.NAME.valueOf(api.getProtocol())) {
 			case oai:
 				return new OaiCollectorPlugin(clientParams);
 			case rest_json2xml:
 				return new RestCollectorPlugin(clientParams);
 			case other:
 				final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
 					.ofNullable(api.getParams().get("other_plugin_type"))
 					.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
 					.get();
 				switch (plugin) {
 					case mdstore_mongodb_dump:
 						return new MongoDbDumpCollectorPlugin(fileSystem);
 					case mdstore_mongodb:
 						return new MDStoreCollectorPlugin();
 					default:
 						throw new UnknownCollectorPluginException("plugin is not managed: " + plugin);
 				}
 			default:
 				throw new UnknownCollectorPluginException("protocol is not managed: " + api.getProtocol());
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
@ -0,0 +1,135 @@
 package eu.dnetlib.dhp.collection;
 import static eu.dnetlib.dhp.common.Constants.*;
 import static eu.dnetlib.dhp.utils.DHPUtils.*;
 import java.io.IOException;
 import java.util.Optional;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.message.MessageSender;
 import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
 /**
 * CollectorWorkerApplication is the main class responsible to start the metadata collection process, storing the outcomes
 * into HDFS. This application will be executed on the hadoop cluster, where invoked in the context of the metadata collection
 * oozie workflow, it will receive all the input parameters necessary to instantiate the specific collection plugin and the
 * relative specific configurations
 *
 * @author Sandro La Bruzzo, Claudio Atzori
 */
 public class CollectorWorkerApplication {
 	private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
 	private final FileSystem fileSystem;
 	public CollectorWorkerApplication(FileSystem fileSystem) {
 		this.fileSystem = fileSystem;
 	}
 	/**
 	 * @param args
 	 */
 	public static void main(final String[] args)
 		throws ParseException, IOException, UnknownCollectorPluginException, CollectorException {
 		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					CollectorWorkerApplication.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/collection/collector_worker_input_parameter.json")));
 		argumentParser.parseArgument(args);
 		log.info("Java Xmx: {}m", Runtime.getRuntime().maxMemory() / (1024 * 1024));
 		final String hdfsuri = argumentParser.get("namenode");
 		log.info("hdfsURI is {}", hdfsuri);
 		final String apiDescriptor = argumentParser.get("apidescriptor");
 		log.info("apiDescriptor is {}", apiDescriptor);
 		final String mdStoreVersion = argumentParser.get("mdStoreVersion");
 		log.info("mdStoreVersion is {}", mdStoreVersion);
 		final String dnetMessageManagerURL = argumentParser.get(DNET_MESSAGE_MGR_URL);
 		log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL);
 		final String workflowId = argumentParser.get("workflowId");
 		log.info("workflowId is {}", workflowId);
 		final HttpClientParams clientParams = getClientParams(argumentParser);
 		final ApiDescriptor api = MAPPER.readValue(apiDescriptor, ApiDescriptor.class);
 		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
 		new CollectorWorkerApplication(fileSystem)
 			.run(mdStoreVersion, clientParams, api, dnetMessageManagerURL, workflowId);
 	}
 	protected void run(String mdStoreVersion, HttpClientParams clientParams, ApiDescriptor api,
 		String dnetMessageManagerURL, String workflowId)
 		throws IOException, CollectorException, UnknownCollectorPluginException {
 		final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class);
 		final MessageSender ms = new MessageSender(dnetMessageManagerURL, workflowId);
 		try (AggregatorReport report = new AggregatorReport(ms)) {
 			new CollectorWorker(api, fileSystem, currentVersion, clientParams, report).collect();
 		}
 	}
 	private static HttpClientParams getClientParams(ArgumentApplicationParser argumentParser) {
 		final HttpClientParams clientParams = new HttpClientParams();
 		clientParams
 			.setMaxNumberOfRetry(
 				Optional
 					.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
 					.map(Integer::parseInt)
 					.orElse(HttpClientParams._maxNumberOfRetry));
 		log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
 		clientParams
 			.setRequestDelay(
 				Optional
 					.ofNullable(argumentParser.get(REQUEST_DELAY))
 					.map(Integer::parseInt)
 					.orElse(HttpClientParams._requestDelay));
 		log.info("requestDelay is {}", clientParams.getRequestDelay());
 		clientParams
 			.setRetryDelay(
 				Optional
 					.ofNullable(argumentParser.get(RETRY_DELAY))
 					.map(Integer::parseInt)
 					.orElse(HttpClientParams._retryDelay));
 		log.info("retryDelay is {}", clientParams.getRetryDelay());
 		clientParams
 			.setConnectTimeOut(
 				Optional
 					.ofNullable(argumentParser.get(CONNECT_TIMEOUT))
 					.map(Integer::parseInt)
 					.orElse(HttpClientParams._connectTimeOut));
 		log.info("connectTimeOut is {}", clientParams.getConnectTimeOut());
 		clientParams
 			.setReadTimeOut(
 				Optional
 					.ofNullable(argumentParser.get(READ_TIMEOUT))
 					.map(Integer::parseInt)
 					.orElse(HttpClientParams._readTimeOut));
 		log.info("readTimeOut is {}", clientParams.getReadTimeOut());
 		return clientParams;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
@ -1,28 +1,26 @@
 package eu.dnetlib.dhp.collection;
 import static eu.dnetlib.dhp.common.Constants.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.utils.DHPUtils.*;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import org.apache.commons.cli.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
+import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.*;
-import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.expressions.Aggregator;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.util.LongAccumulator;
 import org.dom4j.Document;
 import org.dom4j.Node;
@ -30,19 +28,172 @@ import org.dom4j.io.SAXReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
-import eu.dnetlib.dhp.model.mdstore.Provenance;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
-import eu.dnetlib.message.Message;
+import eu.dnetlib.dhp.schema.mdstore.Provenance;
-import eu.dnetlib.message.MessageManager;
+import scala.Tuple2;
 import eu.dnetlib.message.MessageType;
 public class GenerateNativeStoreSparkJob {
 	private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class);
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					GenerateNativeStoreSparkJob.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/collection/generate_native_input_parameters.json")));
 		parser.parseArgument(args);
 		final String provenanceArgument = parser.get("provenance");
 		log.info("Provenance is {}", provenanceArgument);
 		final Provenance provenance = MAPPER.readValue(provenanceArgument, Provenance.class);
 		final String dateOfCollectionArgs = parser.get("dateOfCollection");
 		log.info("dateOfCollection is {}", dateOfCollectionArgs);
 		final Long dateOfCollection = new Long(dateOfCollectionArgs);
 		String mdStoreVersion = parser.get("mdStoreVersion");
 		log.info("mdStoreVersion is {}", mdStoreVersion);
 		final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class);
 		String readMdStoreVersionParam = parser.get("readMdStoreVersion");
 		log.info("readMdStoreVersion is {}", readMdStoreVersionParam);
 		final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null
 			: MAPPER.readValue(readMdStoreVersionParam, MDStoreVersion.class);
 		final String xpath = parser.get("xpath");
 		log.info("xpath is {}", xpath);
 		final String encoding = parser.get("encoding");
 		log.info("encoding is {}", encoding);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> createNativeMDStore(
 				spark, provenance, dateOfCollection, xpath, encoding, currentVersion, readMdStoreVersion));
 	}
 	private static void createNativeMDStore(SparkSession spark,
 		Provenance provenance,
 		Long dateOfCollection,
 		String xpath,
 		String encoding,
 		MDStoreVersion currentVersion,
 		MDStoreVersion readVersion) throws IOException {
 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		final LongAccumulator totalItems = sc.sc().longAccumulator(CONTENT_TOTALITEMS);
 		final LongAccumulator invalidRecords = sc.sc().longAccumulator(CONTENT_INVALIDRECORDS);
 		final String seqFilePath = currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
 		final JavaRDD<MetadataRecord> nativeStore = sc
 			.sequenceFile(seqFilePath, IntWritable.class, Text.class)
 			.map(
 				item -> parseRecord(
 					item._2().toString(),
 					xpath,
 					encoding,
 					provenance,
 					dateOfCollection,
 					totalItems,
 					invalidRecords))
 			.filter(Objects::nonNull)
 			.distinct();
 		final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
 		final Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder);
 		final String targetPath = currentVersion.getHdfsPath() + MDSTORE_DATA_PATH;
 		if (readVersion != null) { // INCREMENTAL MODE
 			log.info("updating {} incrementally with {}", targetPath, readVersion.getHdfsPath());
 			Dataset<MetadataRecord> currentMdStoreVersion = spark
 				.read()
 				.load(readVersion.getHdfsPath() + MDSTORE_DATA_PATH)
 				.as(encoder);
 			TypedColumn<MetadataRecord, MetadataRecord> aggregator = new MDStoreAggregator().toColumn();
 			final Dataset<MetadataRecord> map = currentMdStoreVersion
 				.union(mdstore)
 				.groupByKey(
 					(MapFunction<MetadataRecord, String>) MetadataRecord::getId,
 					Encoders.STRING())
 				.agg(aggregator)
 				.map((MapFunction<Tuple2<String, MetadataRecord>, MetadataRecord>) Tuple2::_2, encoder);
 			map.select("id").takeAsList(100).forEach(s -> log.info(s.toString()));
 			saveDataset(map, targetPath);
 		} else {
 			saveDataset(mdstore, targetPath);
 		}
 		final Long total = spark.read().load(targetPath).count();
 		log.info("collected {} records for datasource '{}'", total, provenance.getDatasourceName());
 		writeHdfsFile(
 			spark.sparkContext().hadoopConfiguration(), total.toString(),
 			currentVersion.getHdfsPath() + MDSTORE_SIZE_PATH);
 	}
 	public static class MDStoreAggregator extends Aggregator<MetadataRecord, MetadataRecord, MetadataRecord> {
 		@Override
 		public MetadataRecord zero() {
 			return null;
 		}
 		@Override
 		public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) {
 			return getLatestRecord(b, a);
 		}
 		@Override
 		public MetadataRecord merge(MetadataRecord b, MetadataRecord a) {
 			return getLatestRecord(b, a);
 		}
 		private MetadataRecord getLatestRecord(MetadataRecord b, MetadataRecord a) {
 			if (b == null)
 				return a;
 			if (a == null)
 				return b;
 			return (a.getDateOfCollection() > b.getDateOfCollection()) ? a : b;
 		}
 		@Override
 		public MetadataRecord finish(MetadataRecord r) {
 			return r;
 		}
 		@Override
 		public Encoder<MetadataRecord> bufferEncoder() {
 			return Encoders.bean(MetadataRecord.class);
 		}
 		@Override
 		public Encoder<MetadataRecord> outputEncoder() {
 			return Encoders.bean(MetadataRecord.class);
 		}
 	}
 	public static MetadataRecord parseRecord(
 		final String input,
 		final String xpath,
@ -64,112 +215,11 @@ public class GenerateNativeStoreSparkJob {
 					invalidRecords.add(1);
 				return null;
 			}
-			return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection);
+			return new MetadataRecord(originalIdentifier, encoding, provenance, document.asXML(), dateOfCollection);
 		} catch (Throwable e) {
 			if (invalidRecords != null)
 			invalidRecords.add(1);
 			e.printStackTrace();
 			return null;
 		}
 	}
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					GenerateNativeStoreSparkJob.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
 		parser.parseArgument(args);
 		final ObjectMapper jsonMapper = new ObjectMapper();
 		final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class);
 		final long dateOfCollection = new Long(parser.get("dateOfCollection"));
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final Map<String, String> ongoingMap = new HashMap<>();
 		final Map<String, String> reportMap = new HashMap<>();
 		final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 				final JavaPairRDD<IntWritable, Text> inputRDD = sc
 					.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
 				final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
 				final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
 				final MessageManager manager = new MessageManager(
 					parser.get("rabbitHost"),
 					parser.get("rabbitUser"),
 					parser.get("rabbitPassword"),
 					false,
 					false,
 					null);
 				final JavaRDD<MetadataRecord> mappeRDD = inputRDD
 					.map(
 						item -> parseRecord(
 							item._2().toString(),
 							parser.get("xpath"),
 							parser.get("encoding"),
 							provenance,
 							dateOfCollection,
 							totalItems,
 							invalidRecords))
 					.filter(Objects::nonNull)
 					.distinct();
 				ongoingMap.put("ongoing", "0");
 				if (!test) {
 					manager
 						.sendMessage(
 							new Message(
 								parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap),
 							parser.get("rabbitOngoingQueue"),
 							true,
 							false);
 				}
 				final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
 				final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
 				final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
 				mdStoreRecords.add(mdstore.count());
 				ongoingMap.put("ongoing", "" + totalItems.value());
 				if (!test) {
 					manager
 						.sendMessage(
 							new Message(
 								parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap),
 							parser.get("rabbitOngoingQueue"),
 							true,
 							false);
 				}
 				mdstore.write().format("parquet").save(parser.get("output"));
 				reportMap.put("inputItem", "" + totalItems.value());
 				reportMap.put("invalidRecords", "" + invalidRecords.value());
 				reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
 				if (!test) {
 					manager
 						.sendMessage(
 							new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
 							parser.get("rabbitReportQueue"),
 							true,
 							false);
 					manager.close();
 				}
 			});
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
@ -0,0 +1,94 @@
 package eu.dnetlib.dhp.collection;
 /**
 * Bundles the http connection parameters driving the client behaviour.
 */
 public class HttpClientParams {
 	// Defaults
 	public static int _maxNumberOfRetry = 3;
 	public static int _requestDelay = 0; // milliseconds
 	public static int _retryDelay = 10; // seconds
 	public static int _connectTimeOut = 10; // seconds
 	public static int _readTimeOut = 30; // seconds
 	/**
 	 * Maximum number of allowed retires before failing
 	 */
 	private int maxNumberOfRetry;
 	/**
 	 * Delay between request (Milliseconds)
 	 */
 	private int requestDelay;
 	/**
 	 * Time to wait after a failure before retrying (Seconds)
 	 */
 	private int retryDelay;
 	/**
 	 * Connect timeout (Seconds)
 	 */
 	private int connectTimeOut;
 	/**
 	 * Read timeout (Seconds)
 	 */
 	private int readTimeOut;
 	public HttpClientParams() {
 		this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
 	}
 	public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
 		int readTimeOut) {
 		this.maxNumberOfRetry = maxNumberOfRetry;
 		this.requestDelay = requestDelay;
 		this.retryDelay = retryDelay;
 		this.connectTimeOut = connectTimeOut;
 		this.readTimeOut = readTimeOut;
 	}
 	public int getMaxNumberOfRetry() {
 		return maxNumberOfRetry;
 	}
 	public void setMaxNumberOfRetry(int maxNumberOfRetry) {
 		this.maxNumberOfRetry = maxNumberOfRetry;
 	}
 	public int getRequestDelay() {
 		return requestDelay;
 	}
 	public void setRequestDelay(int requestDelay) {
 		this.requestDelay = requestDelay;
 	}
 	public int getRetryDelay() {
 		return retryDelay;
 	}
 	public void setRetryDelay(int retryDelay) {
 		this.retryDelay = retryDelay;
 	}
 	public void setConnectTimeOut(int connectTimeOut) {
 		this.connectTimeOut = connectTimeOut;
 	}
 	public int getConnectTimeOut() {
 		return connectTimeOut;
 	}
 	public int getReadTimeOut() {
 		return readTimeOut;
 	}
 	public void setReadTimeOut(int readTimeOut) {
 		this.readTimeOut = readTimeOut;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
@ -0,0 +1,259 @@
 package eu.dnetlib.dhp.collection;
 import static eu.dnetlib.dhp.utils.DHPUtils.*;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.*;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.http.HttpHeaders;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 /**
 * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
 *
 * @author jochen, michele, andrea, alessia, claudio
 */
 public class HttpConnector2 {
 	private static final Logger log = LoggerFactory.getLogger(HttpConnector2.class);
 	private static final String REPORT_PREFIX = "http:";
 	private HttpClientParams clientParams;
 	private String responseType = null;
 	private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
 	public HttpConnector2() {
 		this(new HttpClientParams());
 	}
 	public HttpConnector2(HttpClientParams clientParams) {
 		this.clientParams = clientParams;
 		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
 	}
 	/**
 	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
 	 */
 	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorException {
 		return IOUtils.toInputStream(getInputSource(requestUrl));
 	}
 	/**
 	 * @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
 	 */
 	public String getInputSource(final String requestUrl) throws CollectorException {
 		return attemptDownloadAsString(requestUrl, 1, new AggregatorReport());
 	}
 	/**
 	 * Given the URL returns the content via HTTP GET
 	 *
 	 * @param requestUrl the URL
 	 * @param report the list of errors
 	 * @return the content of the downloaded resource
 	 * @throws CollectorException when retrying more than maxNumberOfRetry times
 	 */
 	public String getInputSource(final String requestUrl, AggregatorReport report)
 		throws CollectorException {
 		return attemptDownloadAsString(requestUrl, 1, report);
 	}
 	private String attemptDownloadAsString(final String requestUrl, final int retryNumber,
 		final AggregatorReport report) throws CollectorException {
 		try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) {
 			return IOUtils.toString(s);
 		} catch (IOException e) {
 			log.error(e.getMessage(), e);
 			throw new CollectorException(e);
 		}
 	}
 	private InputStream attemptDownload(final String requestUrl, final int retryNumber,
 		final AggregatorReport report) throws CollectorException, IOException {
 		if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
 			final String msg = String
 				.format(
 					"Max number of retries (%s/%s) exceeded, failing.",
 					retryNumber, getClientParams().getMaxNumberOfRetry());
 			log.error(msg);
 			throw new CollectorException(msg);
 		}
 		log.info("Request attempt {} [{}]", retryNumber, requestUrl);
 		InputStream input = null;
 		try {
 			if (getClientParams().getRequestDelay() > 0) {
 				backoffAndSleep(getClientParams().getRequestDelay());
 			}
 			final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
 			urlConn.setInstanceFollowRedirects(false);
 			urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
 			urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
 			urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
 			if (log.isDebugEnabled()) {
 				logHeaderFields(urlConn);
 			}
 			int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
 			if (is2xx(urlConn.getResponseCode())) {
 				input = urlConn.getInputStream();
 				responseType = urlConn.getContentType();
 				return input;
 			}
 			if (is3xx(urlConn.getResponseCode())) {
 				// REDIRECTS
 				final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
 				log.info(String.format("The requested url has been moved to %s", newUrl));
 				report
 					.put(
 						REPORT_PREFIX + urlConn.getResponseCode(),
 						String.format("Moved to: %s", newUrl));
 				urlConn.disconnect();
 				if (retryAfter > 0) {
 					backoffAndSleep(retryAfter);
 				}
 				return attemptDownload(newUrl, retryNumber + 1, report);
 			}
 			if (is4xx(urlConn.getResponseCode())) {
 				// CLIENT ERROR, DO NOT RETRY
 				report
 					.put(
 						REPORT_PREFIX + urlConn.getResponseCode(),
 						String
 							.format(
 								"%s error: %s", requestUrl, urlConn.getResponseMessage()));
 				throw new CollectorException("4xx error: request will not be repeated. " + report);
 			}
 			if (is5xx(urlConn.getResponseCode())) {
 				// SERVER SIDE ERRORS RETRY ONLY on 503
 				switch (urlConn.getResponseCode()) {
 					case HttpURLConnection.HTTP_UNAVAILABLE:
 						if (retryAfter > 0) {
 							log
 								.warn(
 									requestUrl + " - waiting and repeating request after suggested retry-after "
 										+ retryAfter + " sec.");
 							backoffAndSleep(retryAfter * 1000);
 						} else {
 							log
 								.warn(
 									requestUrl + " - waiting and repeating request after default delay of "
 										+ getClientParams().getRetryDelay() + " sec.");
 							backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
 						}
 						report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
 						urlConn.disconnect();
 						return attemptDownload(requestUrl, retryNumber + 1, report);
 					default:
 						report
 							.put(
 								REPORT_PREFIX + urlConn.getResponseCode(),
 								String
 									.format(
 										"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
 						throw new CollectorException(urlConn.getResponseCode() + " error " + report);
 				}
 			}
 			throw new CollectorException(
 				String
 					.format(
 						"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
 						MAPPER.writeValueAsString(report)));
 		} catch (MalformedURLException | UnknownHostException e) {
 			log.error(e.getMessage(), e);
 			report.put(e.getClass().getName(), e.getMessage());
 			throw new CollectorException(e.getMessage(), e);
 		} catch (SocketTimeoutException | SocketException e) {
 			log.error(e.getMessage(), e);
 			report.put(e.getClass().getName(), e.getMessage());
 			backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
 			return attemptDownload(requestUrl, retryNumber + 1, report);
 		}
 	}
 	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
 		log.debug("StatusCode: " + urlConn.getResponseMessage());
 		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
 			if (e.getKey() != null) {
 				for (String v : e.getValue()) {
 					log.debug("  key: " + e.getKey() + " - value: " + v);
 				}
 			}
 		}
 	}
 	private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
 		log.info("I'm going to sleep for {}ms", sleepTimeMs);
 		try {
 			Thread.sleep(sleepTimeMs);
 		} catch (InterruptedException e) {
 			log.error(e.getMessage(), e);
 			throw new CollectorException(e);
 		}
 	}
 	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
 		for (String key : headerMap.keySet()) {
 			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0)
 				&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
 				return Integer.parseInt(headerMap.get(key).get(0)) + 10;
 			}
 		}
 		return -1;
 	}
 	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorException {
 		for (String key : headerMap.keySet()) {
 			if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) {
 				return headerMap.get(key).get(0);
 			}
 		}
 		throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING");
 	}
 	private boolean is2xx(final int statusCode) {
 		return statusCode >= 200 && statusCode <= 299;
 	}
 	private boolean is4xx(final int statusCode) {
 		return statusCode >= 400 && statusCode <= 499;
 	}
 	private boolean is3xx(final int statusCode) {
 		return statusCode >= 300 && statusCode <= 399;
 	}
 	private boolean is5xx(final int statusCode) {
 		return statusCode >= 500 && statusCode <= 599;
 	}
 	public String getResponseType() {
 		return responseType;
 	}
 	public HttpClientParams getClientParams() {
 		return clientParams;
 	}
 	public void setClientParams(HttpClientParams clientParams) {
 		this.clientParams = clientParams;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/JsonUtils.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/JsonUtils.java
@ -0,0 +1,84 @@
 package eu.dnetlib.dhp.collection;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 public class JsonUtils {
 	private static final Log log = LogFactory.getLog(JsonUtils.class);
 	public static final String wrapName = "recordWrap";
 	/**
 	 * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
 	 * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
 	 * and work-around for the JSON to XML converting of org.json.XML-package.
 	 *
 	 * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
 	 *
 	 * @param jsonInput
 	 * @return convertedJsonKeynameOutput
 	 */
 	public String syntaxConvertJsonKeyNames(String jsonInput) {
 		log.trace("before convertJsonKeyNames: " + jsonInput);
 		// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
 		// replace ' 's in JSON Namens with '_'
 		while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
 		}
 		// replace forward-slash (sign '/' ) in JSON Names with '_'
 		while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
 		}
 		// replace '(' in JSON Names with ''
 		while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
 		}
 		// replace ')' in JSON Names with ''
 		while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
 		}
 		// add prefix of startNumbers in JSON Keynames with 'n_'
 		while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
 		}
 		// add prefix of only numbers in JSON Keynames with 'm_'
 		while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
 		}
 		// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
 		while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
 		}
 		// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
 		// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
 		// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
 		// }
 		// replace '=' in JSON Keynames with '-'
 		while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
 			jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
 		}
 		log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
 		return jsonInput;
 	}
 	public String convertToXML(final String jsonRecord) {
 		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 		org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
 		resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
 		log.trace("before inputStream: " + resultXml);
 		resultXml = XmlCleaner.cleanAllEntities(resultXml);
 		log.trace("after cleaning: " + resultXml);
 		return resultXml;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/UnknownCollectorPluginException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/UnknownCollectorPluginException.java
@ -0,0 +1,32 @@
 package eu.dnetlib.dhp.collection;
 public class UnknownCollectorPluginException extends Exception {
 	/** */
 	private static final long serialVersionUID = -290723075076039757L;
 	public UnknownCollectorPluginException() {
 		super();
 	}
 	public UnknownCollectorPluginException(
 		final String message,
 		final Throwable cause,
 		final boolean enableSuppression,
 		final boolean writableStackTrace) {
 		super(message, cause, enableSuppression, writableStackTrace);
 	}
 	public UnknownCollectorPluginException(final String message, final Throwable cause) {
 		super(message, cause);
 	}
 	public UnknownCollectorPluginException(final String message) {
 		super(message);
 	}
 	public UnknownCollectorPluginException(final Throwable cause) {
 		super(cause);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.collection.worker.utils;
+package eu.dnetlib.dhp.collection;
 import java.util.HashMap;
 import java.util.HashSet;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@ -3,10 +3,21 @@ package eu.dnetlib.dhp.collection.plugin;
 import java.util.stream.Stream;
-import eu.dnetlib.collector.worker.model.ApiDescriptor;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
+import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.CollectorException;
 public interface CollectorPlugin {
-	Stream<String> collect(ApiDescriptor api) throws DnetCollectorException;
+	enum NAME {
 		oai, other, rest_json2xml;
 		public enum OTHER_NAME {
 			mdstore_mongodb_dump, mdstore_mongodb
 		}
 	}
 	Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException;
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
@ -0,0 +1,60 @@
 package eu.dnetlib.dhp.collection.plugin.mongodb;
 import java.util.Optional;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import org.bson.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.mongodb.MongoClient;
 import com.mongodb.MongoClientURI;
 import com.mongodb.client.MongoCollection;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.CollectorException;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.common.MdstoreClient;
 public class MDStoreCollectorPlugin implements CollectorPlugin {
 	private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class);
 	public static final String MONGODB_DBNAME = "mongodb_dbname";
 	public static final String MDSTORE_ID = "mdstore_id";
 	@Override
 	public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
 		final String mongoBaseUrl = Optional
 			.ofNullable(api.getBaseUrl())
 			.orElseThrow(
 				() -> new CollectorException(
 					"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
 		log.info("mongoBaseUrl: {}", mongoBaseUrl);
 		final String dbName = Optional
 			.ofNullable(api.getParams().get(MONGODB_DBNAME))
 			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
 		log.info("dbName: {}", dbName);
 		final String mdId = Optional
 			.ofNullable(api.getParams().get(MDSTORE_ID))
 			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
 		log.info("mdId: {}", mdId);
 		final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
 		final MongoCollection<Document> mdstore = client.mdStore(mdId);
 		long size = mdstore.count();
 		return StreamSupport
 			.stream(
 				Spliterators.spliterator(mdstore.find().iterator(), size, Spliterator.SIZED), false)
 			.map(doc -> doc.getString("body"));
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
@ -0,0 +1,54 @@
 package eu.dnetlib.dhp.collection.plugin.mongodb;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.util.Optional;
 import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.CollectorException;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.utils.DHPUtils;
 public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
 	public static final String PATH_PARAM = "path";
 	public static final String BODY_JSONPATH = "$.body";
 	public FileSystem fileSystem;
 	public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
 		this.fileSystem = fileSystem;
 	}
 	@Override
 	public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
 		final Path path = Optional
 			.ofNullable(api.getParams().get("path"))
 			.map(Path::new)
 			.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", PATH_PARAM)));
 		try {
 			if (!fileSystem.exists(path)) {
 				throw new CollectorException("path does not exist: " + path.toString());
 			}
 			return new BufferedReader(
 				new InputStreamReader(new GZIPInputStream(fileSystem.open(path)), Charset.defaultCharset()))
 					.lines()
 					.map(s -> DHPUtils.getJPathString(BODY_JSONPATH, s));
 		} catch (IOException e) {
 			throw new CollectorException(e);
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
@ -13,9 +13,11 @@ import com.google.common.base.Splitter;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
-import eu.dnetlib.collector.worker.model.ApiDescriptor;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.CollectorException;
 import eu.dnetlib.dhp.collection.HttpClientParams;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
 public class OaiCollectorPlugin implements CollectorPlugin {
@ -26,8 +28,15 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 	private OaiIteratorFactory oaiIteratorFactory;
 	private HttpClientParams clientParams;
 	public OaiCollectorPlugin(HttpClientParams clientParams) {
 		this.clientParams = clientParams;
 	}
 	@Override
-	public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException {
+	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report)
 		throws CollectorException {
 		final String baseUrl = api.getBaseUrl();
 		final String mdFormat = api.getParams().get(FORMAT_PARAM);
 		final String setParam = api.getParams().get(OAI_SET_PARAM);
@ -46,26 +55,26 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 		}
 		if (baseUrl == null || baseUrl.isEmpty()) {
-			throw new DnetCollectorException("Param 'baseurl' is null or empty");
+			throw new CollectorException("Param 'baseurl' is null or empty");
 		}
 		if (mdFormat == null || mdFormat.isEmpty()) {
-			throw new DnetCollectorException("Param 'mdFormat' is null or empty");
+			throw new CollectorException("Param 'mdFormat' is null or empty");
 		}
 		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
-			throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
+			throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
 		}
 		if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
-			throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
+			throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
 		}
 		final Iterator<Iterator<String>> iters = sets
 			.stream()
 			.map(
 				set -> getOaiIteratorFactory()
-					.newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
+					.newIterator(baseUrl, mdFormat, set, fromDate, untilDate, getClientParams(), report))
 			.iterator();
 		return StreamSupport
@ -79,4 +88,12 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 		}
 		return oaiIteratorFactory;
 	}
 	public HttpClientParams getClientParams() {
 		return clientParams;
 	}
 	public void setClientParams(HttpClientParams clientParams) {
 		this.clientParams = clientParams;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@ -1,7 +1,9 @@
 package eu.dnetlib.dhp.collection.plugin.oai;
 import java.io.IOException;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.util.Iterator;
@ -9,24 +11,28 @@ import java.util.Queue;
 import java.util.concurrent.PriorityBlockingQueue;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.DocumentHelper;
 import org.dom4j.Node;
 import org.dom4j.io.OutputFormat;
 import org.dom4j.io.SAXReader;
 import org.dom4j.io.XMLWriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
-import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
+import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
+import eu.dnetlib.dhp.collection.HttpConnector2;
 import eu.dnetlib.dhp.collection.XmlCleaner;
 public class OaiIterator implements Iterator<String> {
-	private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on
+	private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
-	// 11/24/08 5:02 PM
+
 	private final static String REPORT_PREFIX = "oai:";
 	private final Queue<String> queue = new PriorityBlockingQueue<>();
 	private final SAXReader reader = new SAXReader();
 	private final String baseUrl;
 	private final String set;
@ -35,7 +41,8 @@ public class OaiIterator implements Iterator<String> {
 	private final String untilDate;
 	private String token;
 	private boolean started;
-	private final HttpConnector httpConnector;
+	private final HttpConnector2 httpConnector;
 	private final AggregatorReport report;
 	public OaiIterator(
 		final String baseUrl,
@ -43,7 +50,8 @@ public class OaiIterator implements Iterator<String> {
 		final String set,
 		final String fromDate,
 		final String untilDate,
-		final HttpConnector httpConnector) {
+		final HttpConnector2 httpConnector,
 		final AggregatorReport report) {
 		this.baseUrl = baseUrl;
 		this.mdFormat = mdFormat;
 		this.set = set;
@ -51,6 +59,7 @@ public class OaiIterator implements Iterator<String> {
 		this.untilDate = untilDate;
 		this.started = false;
 		this.httpConnector = httpConnector;
 		this.report = report;
 	}
 	private void verifyStarted() {
@ -58,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
 			this.started = true;
 			try {
 				this.token = firstPage();
-			} catch (final DnetCollectorException e) {
+			} catch (final CollectorException e) {
 				throw new RuntimeException(e);
 			}
 		}
@ -80,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
 			while (queue.isEmpty() && token != null && !token.isEmpty()) {
 				try {
 					token = otherPages(token);
-				} catch (final DnetCollectorException e) {
+				} catch (final CollectorException e) {
 					throw new RuntimeException(e);
 				}
 			}
@ -92,7 +101,7 @@ public class OaiIterator implements Iterator<String> {
 	public void remove() {
 	}
-	private String firstPage() throws DnetCollectorException {
+	private String firstPage() throws CollectorException {
 		try {
 			String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
 			if (set != null && !set.isEmpty()) {
@ -108,7 +117,8 @@ public class OaiIterator implements Iterator<String> {
 			return downloadPage(url);
 		} catch (final UnsupportedEncodingException e) {
-			throw new DnetCollectorException(e);
+			report.put(e.getClass().getName(), e.getMessage());
 			throw new CollectorException(e);
 		}
 	}
@ -126,32 +136,35 @@ public class OaiIterator implements Iterator<String> {
 		return result.trim();
 	}
-	private String otherPages(final String resumptionToken) throws DnetCollectorException {
+	private String otherPages(final String resumptionToken) throws CollectorException {
 		try {
 			return downloadPage(
 				baseUrl
 					+ "?verb=ListRecords&resumptionToken="
 					+ URLEncoder.encode(resumptionToken, "UTF-8"));
 		} catch (final UnsupportedEncodingException e) {
-			throw new DnetCollectorException(e);
+			report.put(e.getClass().getName(), e.getMessage());
 			throw new CollectorException(e);
 		}
 	}
-	private String downloadPage(final String url) throws DnetCollectorException {
+	private String downloadPage(final String url) throws CollectorException {
-		final String xml = httpConnector.getInputSource(url);
+		final String xml = httpConnector.getInputSource(url, report);
 		Document doc;
 		try {
-			doc = reader.read(new StringReader(xml));
+			doc = DocumentHelper.parseText(xml);
 		} catch (final DocumentException e) {
-			log.warn("Error parsing xml, I try to clean it: " + xml, e);
+			log.warn("Error parsing xml, I try to clean it. {}", e.getMessage());
 			report.put(e.getClass().getName(), e.getMessage());
 			final String cleaned = XmlCleaner.cleanAllEntities(xml);
 			try {
-				doc = reader.read(new StringReader(cleaned));
+				doc = DocumentHelper.parseText(xml);
 			} catch (final DocumentException e1) {
 				final String resumptionToken = extractResumptionToken(xml);
 				if (resumptionToken == null) {
-					throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1);
+					report.put(e1.getClass().getName(), e1.getMessage());
 					throw new CollectorException("Error parsing cleaned document:\n" + cleaned, e1);
 				}
 				return resumptionToken;
 			}
@ -159,19 +172,35 @@ public class OaiIterator implements Iterator<String> {
 		final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
 		if (errorNode != null) {
-			final String code = errorNode.valueOf("@code");
+			final String code = errorNode.valueOf("@code").trim();
-			if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
+			if ("noRecordsMatch".equalsIgnoreCase(code)) {
-				log.warn("noRecordsMatch for oai call: " + url);
+				final String msg = "noRecordsMatch for oai call : " + url;
 				log.warn(msg);
 				report.put(REPORT_PREFIX + code, msg);
 				return null;
 			} else {
-				throw new DnetCollectorException(code + " - " + errorNode.getText());
+				final String msg = code + " - " + errorNode.getText();
 				report.put(REPORT_PREFIX + "error", msg);
 				throw new CollectorException(msg);
 			}
 		}
 		for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
-			queue.add(((Node) o).asXML());
+			final StringWriter sw = new StringWriter();
 			final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
 			try {
 				writer.write((Node) o);
 				queue.add(sw.toString());
 			} catch (IOException e) {
 				report.put(e.getClass().getName(), e.getMessage());
 				throw new CollectorException("Error parsing XML record:\n" + ((Node) o).asXML(), e);
 			}
 		}
 		return doc.valueOf("//*[local-name()='resumptionToken']");
 	}
 	public AggregatorReport getReport() {
 		return report;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
@ -3,24 +3,28 @@ package eu.dnetlib.dhp.collection.plugin.oai;
 import java.util.Iterator;
-import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
+import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.collection.HttpClientParams;
 import eu.dnetlib.dhp.collection.HttpConnector2;
 public class OaiIteratorFactory {
-	private HttpConnector httpConnector;
+	private HttpConnector2 httpConnector;
 	public Iterator<String> newIterator(
 		final String baseUrl,
 		final String mdFormat,
 		final String set,
 		final String fromDate,
-		final String untilDate) {
+		final String untilDate,
-		return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
+		final HttpClientParams clientParams,
 		final AggregatorReport report) {
 		return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector(clientParams), report);
 	}
-	private HttpConnector getHttpConnector() {
+	private HttpConnector2 getHttpConnector(HttpClientParams clientParams) {
 		if (httpConnector == null)
-			httpConnector = new HttpConnector();
+			httpConnector = new HttpConnector2(clientParams);
 		return httpConnector;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
@ -0,0 +1,105 @@
 package eu.dnetlib.dhp.collection.plugin.rest;
 import java.util.Optional;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import org.apache.commons.lang3.StringUtils;
 import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.CollectorException;
 import eu.dnetlib.dhp.collection.HttpClientParams;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 /**
 * TODO: delegate HTTP requests to the common HttpConnector2 implementation.
 *
 * @author 	js, Andreas Czerniak
 * @date 	2020-04-09
 *
 */
 public class RestCollectorPlugin implements CollectorPlugin {
 	public static final String RESULT_SIZE_VALUE_DEFAULT = "100";
 	private final HttpClientParams clientParams;
 	public RestCollectorPlugin(HttpClientParams clientParams) {
 		this.clientParams = clientParams;
 	}
 	@Override
 	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
 		final String baseUrl = api.getBaseUrl();
 		final String resumptionType = api.getParams().get("resumptionType");
 		final String resumptionParam = api.getParams().get("resumptionParam");
 		final String resumptionXpath = api.getParams().get("resumptionXpath");
 		final String resultTotalXpath = api.getParams().get("resultTotalXpath");
 		final String resultFormatParam = api.getParams().get("resultFormatParam");
 		final String resultFormatValue = api.getParams().get("resultFormatValue");
 		final String resultSizeParam = api.getParams().get("resultSizeParam");
 		final String queryParams = api.getParams().get("queryParams");
 		final String entityXpath = api.getParams().get("entityXpath");
 		final String authMethod = api.getParams().get("authMethod");
 		final String authToken = api.getParams().get("authToken");
 		final String resultSizeValue = Optional
 			.ofNullable(api.getParams().get("resultSizeValue"))
 			.filter(StringUtils::isNotBlank)
 			.orElse(RESULT_SIZE_VALUE_DEFAULT);
 		if (StringUtils.isBlank(baseUrl)) {
 			throw new CollectorException("Param 'baseUrl' is null or empty");
 		}
 		if (StringUtils.isBlank(resumptionType)) {
 			throw new CollectorException("Param 'resumptionType' is null or empty");
 		}
 		if (StringUtils.isBlank(resumptionParam)) {
 			throw new CollectorException("Param 'resumptionParam' is null or empty");
 		}
 		if (StringUtils.isBlank(resultFormatValue)) {
 			throw new CollectorException("Param 'resultFormatValue' is null or empty");
 		}
 		if (StringUtils.isBlank(queryParams)) {
 			throw new CollectorException("Param 'queryParams' is null or empty");
 		}
 		if (StringUtils.isBlank(entityXpath)) {
 			throw new CollectorException("Param 'entityXpath' is null or empty");
 		}
 		final String resultOutputFormat = Optional
 			.ofNullable(api.getParams().get("resultOutputFormat"))
 			.map(String::toLowerCase)
 			.filter(StringUtils::isNotBlank)
 			.orElse(resultFormatValue.toLowerCase());
 		RestIterator it = new RestIterator(
 			getClientParams(),
 			baseUrl,
 			resumptionType,
 			resumptionParam,
 			resumptionXpath,
 			resultTotalXpath,
 			resultFormatParam,
 			resultFormatValue,
 			resultSizeParam,
 			resultSizeValue,
 			queryParams,
 			entityXpath,
 			authMethod,
 			authToken,
 			resultOutputFormat);
 		return StreamSupport
 			.stream(
 				Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false);
 	}
 	public HttpClientParams getClientParams() {
 		return clientParams;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@ -0,0 +1,411 @@
 package eu.dnetlib.dhp.collection.plugin.rest;
 import java.io.InputStream;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
 import java.util.Queue;
 import java.util.concurrent.PriorityBlockingQueue;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 import javax.xml.xpath.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.http.HttpHeaders;
 import org.apache.http.entity.ContentType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
 import eu.dnetlib.dhp.collection.CollectorException;
 import eu.dnetlib.dhp.collection.HttpClientParams;
 import eu.dnetlib.dhp.collection.JsonUtils;
 /**
 * log.info(...) equal to  log.trace(...) in the application-logs
 * <p>
 * known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
 *
 * @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak
 * @date 2020-04-09
 *
 */
 public class RestIterator implements Iterator<String> {
 	private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
 	public static final String UTF_8 = "UTF-8";
 	private final HttpClientParams clientParams;
 	private final String BASIC = "basic";
 	private final JsonUtils jsonUtils;
 	private final String baseUrl;
 	private final String resumptionType;
 	private final String resumptionParam;
 	private final String resultFormatValue;
 	private String queryParams;
 	private final int resultSizeValue;
 	private int resumptionInt = 0; // integer resumption token (first record to harvest)
 	private int resultTotal = -1;
 	private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest
 																	// or token scanned from results)
 	private InputStream resultStream;
 	private Transformer transformer;
 	private XPath xpath;
 	private String query;
 	private XPathExpression xprResultTotalPath;
 	private XPathExpression xprResumptionPath;
 	private XPathExpression xprEntity;
 	private final String queryFormat;
 	private final String querySize;
 	private final String authMethod;
 	private final String authToken;
 	private final Queue<String> recordQueue = new PriorityBlockingQueue<String>();
 	private int discoverResultSize = 0;
 	private int pagination = 1;
 	/*
 	 * While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in
 	 * json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
 	 * json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
 	 */
 	private final String resultOutputFormat;
 	/** RestIterator class
 	 *  compatible to version 1.3.33
 	 */
 	public RestIterator(
 		final HttpClientParams clientParams,
 		final String baseUrl,
 		final String resumptionType,
 		final String resumptionParam,
 		final String resumptionXpath,
 		final String resultTotalXpath,
 		final String resultFormatParam,
 		final String resultFormatValue,
 		final String resultSizeParam,
 		final String resultSizeValueStr,
 		final String queryParams,
 		final String entityXpath,
 		final String authMethod,
 		final String authToken,
 		final String resultOutputFormat) {
 		this.clientParams = clientParams;
 		this.jsonUtils = new JsonUtils();
 		this.baseUrl = baseUrl;
 		this.resumptionType = resumptionType;
 		this.resumptionParam = resumptionParam;
 		this.resultFormatValue = resultFormatValue;
 		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
 		this.queryParams = queryParams;
 		this.authMethod = authMethod;
 		this.authToken = authToken;
 		this.resultOutputFormat = resultOutputFormat;
 		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
 			: "";
 		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
 		try {
 			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
 		} catch (Exception e) {
 			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
 		}
 		initQueue();
 	}
 	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
 		throws TransformerConfigurationException, XPathExpressionException {
 		transformer = TransformerFactory.newInstance().newTransformer();
 		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
 		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
 		xpath = XPathFactory.newInstance().newXPath();
 		xprResultTotalPath = xpath.compile(resultTotalXpath);
 		xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
 		xprEntity = xpath.compile(entityXpath);
 	}
 	private void initQueue() {
 		query = baseUrl + "?" + queryParams + querySize + queryFormat;
 		log.info("REST calls starting with " + query);
 	}
 	private void disconnect() {
 		// TODO close inputstream
 	}
 	/*
 	 * (non-Javadoc)
 	 * @see java.util.Iterator#hasNext()
 	 */
 	@Override
 	public boolean hasNext() {
 		if (recordQueue.isEmpty() && query.isEmpty()) {
 			disconnect();
 			return false;
 		} else {
 			return true;
 		}
 	}
 	/*
 	 * (non-Javadoc)
 	 * @see java.util.Iterator#next()
 	 */
 	@Override
 	public String next() {
 		synchronized (recordQueue) {
 			while (recordQueue.isEmpty() && !query.isEmpty()) {
 				try {
 					query = downloadPage(query);
 				} catch (CollectorException e) {
 					log.debug("CollectorPlugin.next()-Exception: " + e);
 					throw new RuntimeException(e);
 				}
 			}
 			return recordQueue.poll();
 		}
 	}
 	/*
 	 * download page and return nextQuery
 	 */
 	private String downloadPage(String query) throws CollectorException {
 		String resultJson;
 		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 		String nextQuery = "";
 		String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
 		Node resultNode = null;
 		NodeList nodeList = null;
 		String qUrlArgument = "";
 		int urlOldResumptionSize = 0;
 		InputStream theHttpInputStream;
 		// check if cursor=* is initial set otherwise add it to the queryParam URL
 		if (resumptionType.equalsIgnoreCase("deep-cursor")) {
 			log.debug("check resumptionType deep-cursor and check cursor=*?" + query);
 			if (!query.contains("&cursor=")) {
 				query += "&cursor=*";
 			}
 		}
 		try {
 			log.info("requestig URL [{}]", query);
 			URL qUrl = new URL(query);
 			log.debug("authMethod :" + authMethod);
 			if ("bearer".equalsIgnoreCase(this.authMethod)) {
 				log.trace("authMethod before inputStream: " + resultXml);
 				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
 				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
 				conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
 				conn.setRequestMethod("GET");
 				theHttpInputStream = conn.getInputStream();
 			} else if (BASIC.equalsIgnoreCase(this.authMethod)) {
 				log.trace("authMethod before inputStream: " + resultXml);
 				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
 				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
 				conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
 				conn.setRequestMethod("GET");
 				theHttpInputStream = conn.getInputStream();
 			} else {
 				theHttpInputStream = qUrl.openStream();
 			}
 			resultStream = theHttpInputStream;
 			if ("json".equals(resultOutputFormat)) {
 				resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
 				resultXml = jsonUtils.convertToXML(resultJson);
 				resultStream = IOUtils.toInputStream(resultXml, UTF_8);
 			}
 			if (!(emptyXml).equalsIgnoreCase(resultXml)) {
 				resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
 				nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
 				log.debug("nodeList.length: " + nodeList.getLength());
 				for (int i = 0; i < nodeList.getLength(); i++) {
 					StringWriter sw = new StringWriter();
 					transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
 					String toEnqueue = sw.toString();
 					if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
 						log.warn("The following record resulted in empty item for the feeding queue: " + resultXml);
 					} else {
 						recordQueue.add(sw.toString());
 					}
 				}
 			} else {
 				log.warn("resultXml is equal with emptyXml");
 			}
 			resumptionInt += resultSizeValue;
 			switch (resumptionType.toLowerCase()) {
 				case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
 					resumptionStr = xprResumptionPath.evaluate(resultNode);
 					break;
 				case "count": // begin at one step for all records, iterate over items
 					resumptionStr = Integer.toString(resumptionInt);
 					break;
 				case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
 					if (resultSizeValue < 2) {
 						throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
 					}
 					qUrlArgument = qUrl.getQuery();
 					String[] arrayQUrlArgument = qUrlArgument.split("&");
 					for (String arrayUrlArgStr : arrayQUrlArgument) {
 						if (arrayUrlArgStr.startsWith(resumptionParam)) {
 							String[] resumptionKeyValue = arrayUrlArgStr.split("=");
 							if (isInteger(resumptionKeyValue[1])) {
 								urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
 								log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
 							} else {
 								log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
 							}
 						}
 					}
 					if (((emptyXml).equalsIgnoreCase(resultXml))
 						|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue))) {
 						// resumptionStr = "";
 						if (nodeList != null) {
 							discoverResultSize += nodeList.getLength();
 						}
 						resultTotal = discoverResultSize;
 					} else {
 						resumptionStr = Integer.toString(resumptionInt);
 						resultTotal = resumptionInt + 1;
 						if (nodeList != null) {
 							discoverResultSize += nodeList.getLength();
 						}
 					}
 					log.info("discoverResultSize:  {}", discoverResultSize);
 					break;
 				case "pagination":
 				case "page": // pagination, iterate over page numbers
 					pagination += 1;
 					if (nodeList != null) {
 						discoverResultSize += nodeList.getLength();
 					} else {
 						resultTotal = discoverResultSize;
 						pagination = discoverResultSize;
 					}
 					resumptionInt = pagination;
 					resumptionStr = Integer.toString(resumptionInt);
 					break;
 				case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in
 									// solr)
 					// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
 					// deep-cursor, Param 'resultSizeValue' is less than 2");}
 					resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
 					queryParams = queryParams.replace("&cursor=*", "");
 					// terminating if length of nodeList is 0
 					if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) {
 						resumptionInt += (nodeList.getLength() + 1 - resultSizeValue);
 					} else {
 						resumptionInt += (nodeList.getLength() - resultSizeValue); // subtract the resultSizeValue
 																					// because the iteration is over
 																					// real length and the
 																					// resultSizeValue is added before
 																					// the switch()
 					}
 					discoverResultSize = nodeList.getLength();
 					log
 						.debug(
 							"downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams="
 								+ queryParams + " resumptionLengthIncreased: " + resumptionInt);
 					break;
 				default: // otherwise: abort
 					// resultTotal = resumptionInt;
 					break;
 			}
 		} catch (Exception e) {
 			log.error(e.getMessage(), e);
 			throw new IllegalStateException("collection failed: " + e.getMessage());
 		}
 		try {
 			if (resultTotal == -1) {
 				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
 				if (resumptionType.equalsIgnoreCase("page") && !BASIC.equalsIgnoreCase(authMethod)) {
 					resultTotal += 1;
 				} // to correct the upper bound
 				log.info("resultTotal was -1 is now: " + resultTotal);
 			}
 		} catch (Exception e) {
 			log.error(e.getMessage(), e);
 			throw new IllegalStateException("downloadPage resultTotal couldn't parse: " + e.getMessage());
 		}
 		log.debug("resultTotal: " + resultTotal);
 		log.debug("resInt: " + resumptionInt);
 		if (resumptionInt <= resultTotal) {
 			nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr
 				+ queryFormat;
 		} else {
 			nextQuery = "";
 			// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the
 			// resumptionInt and prevent a NullPointer Exception at mdStore
 		}
 		log.debug("nextQueryUrl: " + nextQuery);
 		return nextQuery;
 	}
 	private boolean isInteger(String s) {
 		boolean isValidInteger = false;
 		try {
 			Integer.parseInt(s);
 			// s is a valid integer
 			isValidInteger = true;
 		} catch (NumberFormatException ex) {
 			// s is not an integer
 		}
 		return isValidInteger;
 	}
 	// Method to encode a string value using `UTF-8` encoding scheme
 	private String encodeValue(String value) {
 		try {
 			return URLEncoder.encode(value, StandardCharsets.UTF_8.toString());
 		} catch (UnsupportedEncodingException ex) {
 			throw new RuntimeException(ex.getCause());
 		}
 	}
 	public String getResultFormatValue() {
 		return resultFormatValue;
 	}
 	public String getResultOutputFormat() {
 		return resultOutputFormat;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
@ -1,139 +0,0 @@
 package eu.dnetlib.dhp.collection.worker;
 import java.io.IOException;
 import java.net.URI;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.collector.worker.model.ApiDescriptor;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
 import eu.dnetlib.message.Message;
 import eu.dnetlib.message.MessageManager;
 import eu.dnetlib.message.MessageType;
 public class DnetCollectorWorker {
 	private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
 	private final CollectorPluginFactory collectorPluginFactory;
 	private final ArgumentApplicationParser argumentParser;
 	private final MessageManager manager;
 	public DnetCollectorWorker(
 		final CollectorPluginFactory collectorPluginFactory,
 		final ArgumentApplicationParser argumentParser,
 		final MessageManager manager)
 		throws DnetCollectorException {
 		this.collectorPluginFactory = collectorPluginFactory;
 		this.argumentParser = argumentParser;
 		this.manager = manager;
 	}
 	public void collect() throws DnetCollectorException {
 		try {
 			final ObjectMapper jsonMapper = new ObjectMapper();
 			final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
 			final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
 			final String hdfsuri = argumentParser.get("namenode");
 			// ====== Init HDFS File System Object
 			Configuration conf = new Configuration();
 			// Set FileSystem URI
 			conf.set("fs.defaultFS", hdfsuri);
 			// Because of Maven
 			conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 			conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
 			System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS"));
 			System.setProperty("hadoop.home.dir", "/");
 			// Get the filesystem - HDFS
 			FileSystem.get(URI.create(hdfsuri), conf);
 			Path hdfswritepath = new Path(argumentParser.get("hdfsPath"));
 			log.info("Created path " + hdfswritepath.toString());
 			final Map<String, String> ongoingMap = new HashMap<>();
 			final Map<String, String> reportMap = new HashMap<>();
 			final AtomicInteger counter = new AtomicInteger(0);
 			try (SequenceFile.Writer writer = SequenceFile
 				.createWriter(
 					conf,
 					SequenceFile.Writer.file(hdfswritepath),
 					SequenceFile.Writer.keyClass(IntWritable.class),
 					SequenceFile.Writer.valueClass(Text.class))) {
 				final IntWritable key = new IntWritable(counter.get());
 				final Text value = new Text();
 				plugin
 					.collect(api)
 					.forEach(
 						content -> {
 							key.set(counter.getAndIncrement());
 							value.set(content);
 							if (counter.get() % 10 == 0) {
 								try {
 									ongoingMap.put("ongoing", "" + counter.get());
 									log
 										.debug(
 											"Sending message: "
 												+ manager
 													.sendMessage(
 														new Message(
 															argumentParser.get("workflowId"),
 															"Collection",
 															MessageType.ONGOING,
 															ongoingMap),
 														argumentParser.get("rabbitOngoingQueue"),
 														true,
 														false));
 								} catch (Exception e) {
 									log.error("Error on sending message ", e);
 								}
 							}
 							try {
 								writer.append(key, value);
 							} catch (IOException e) {
 								throw new RuntimeException(e);
 							}
 						});
 			}
 			ongoingMap.put("ongoing", "" + counter.get());
 			manager
 				.sendMessage(
 					new Message(
 						argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap),
 					argumentParser.get("rabbitOngoingQueue"),
 					true,
 					false);
 			reportMap.put("collected", "" + counter.get());
 			manager
 				.sendMessage(
 					new Message(
 						argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
 					argumentParser.get("rabbitOngoingQueue"),
 					true,
 					false);
 			manager.close();
 		} catch (Throwable e) {
 			throw new DnetCollectorException("Error on collecting ", e);
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
@ -1,49 +0,0 @@
 package eu.dnetlib.dhp.collection.worker;
 import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
 import eu.dnetlib.message.MessageManager;
 /**
 * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module
 * will be executed on the hadoop cluster and taking in input some parameters that tells it which is the right collector
 * plugin to use and where store the data into HDFS path
 *
 * @author Sandro La Bruzzo
 */
 public class DnetCollectorWorkerApplication {
 	private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
 	private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
 	private static ArgumentApplicationParser argumentParser;
 	/** @param args */
 	public static void main(final String[] args) throws Exception {
 		argumentParser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					DnetCollectorWorker.class
 						.getResourceAsStream(
 							"/eu/dnetlib/collector/worker/collector_parameter.json")));
 		argumentParser.parseArgument(args);
 		log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
 		log.info("json = " + argumentParser.get("apidescriptor"));
 		final MessageManager manager = new MessageManager(
 			argumentParser.get("rabbitHost"),
 			argumentParser.get("rabbitUser"),
 			argumentParser.get("rabbitPassword"),
 			false,
 			false,
 			null);
 		final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
 		worker.collect();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
@ -1,19 +0,0 @@
 package eu.dnetlib.dhp.collection.worker.utils;
 import java.util.LinkedList;
 public class CollectorPluginErrorLogList extends LinkedList<String> {
 	private static final long serialVersionUID = -6925786561303289704L;
 	@Override
 	public String toString() {
 		String log = "";
 		int index = 0;
 		for (final String errorMessage : this) {
 			log += String.format("Retry #%s: %s / ", index++, errorMessage);
 		}
 		return log;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
@ -1,20 +0,0 @@
 package eu.dnetlib.dhp.collection.worker.utils;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
 import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
 public class CollectorPluginFactory {
 	public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
 		if (protocol == null)
 			throw new DnetCollectorException("protocol cannot be null");
 		switch (protocol.toLowerCase().trim()) {
 			case "oai":
 				return new OaiCollectorPlugin();
 			default:
 				throw new DnetCollectorException("UNknown protocol");
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
@ -1,244 +0,0 @@
 package eu.dnetlib.dhp.collection.worker.utils;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.*;
 import java.security.GeneralSecurityException;
 import java.security.cert.X509Certificate;
 import java.util.List;
 import java.util.Map;
 import javax.net.ssl.HttpsURLConnection;
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.math.NumberUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
 public class HttpConnector {
 	private static final Log log = LogFactory.getLog(HttpConnector.class);
 	private int maxNumberOfRetry = 6;
 	private int defaultDelay = 120; // seconds
 	private int readTimeOut = 120; // seconds
 	private String responseType = null;
 	private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
 	public HttpConnector() {
 		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
 	}
 	/**
 	 * Given the URL returns the content via HTTP GET
 	 *
 	 * @param requestUrl the URL
 	 * @return the content of the downloaded resource
 	 * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
 	 */
 	public String getInputSource(final String requestUrl) throws DnetCollectorException {
 		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
 	}
 	/**
 	 * Given the URL returns the content as a stream via HTTP GET
 	 *
 	 * @param requestUrl the URL
 	 * @return the content of the downloaded resource as InputStream
 	 * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
 	 */
 	public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
 		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
 	}
 	private String attemptDownlaodAsString(
 		final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
 		throws DnetCollectorException {
 		try {
 			final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
 			try {
 				return IOUtils.toString(s);
 			} catch (final IOException e) {
 				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
 				Thread.sleep(defaultDelay * 1000);
 				errorList.add(e.getMessage());
 				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
 			} finally {
 				IOUtils.closeQuietly(s);
 			}
 		} catch (final InterruptedException e) {
 			throw new DnetCollectorException(e);
 		}
 	}
 	private InputStream attemptDownload(
 		final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
 		throws DnetCollectorException {
 		if (retryNumber > maxNumberOfRetry) {
 			throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList);
 		}
 		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
 		try {
 			InputStream input = null;
 			try {
 				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
 				urlConn.setInstanceFollowRedirects(false);
 				urlConn.setReadTimeout(readTimeOut * 1000);
 				urlConn.addRequestProperty("User-Agent", userAgent);
 				if (log.isDebugEnabled()) {
 					logHeaderFields(urlConn);
 				}
 				final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
 				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
 					log.warn("waiting and repeating request after " + retryAfter + " sec.");
 					Thread.sleep(retryAfter * 1000);
 					errorList.add("503 Service Unavailable");
 					urlConn.disconnect();
 					return attemptDownload(requestUrl, retryNumber + 1, errorList);
 				} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
 					|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
 					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
 					log.debug("The requested url has been moved to " + newUrl);
 					errorList
 						.add(
 							String
 								.format(
 									"%s %s. Moved to: %s",
 									urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
 					urlConn.disconnect();
 					return attemptDownload(newUrl, retryNumber + 1, errorList);
 				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
 					log
 						.error(
 							String
 								.format(
 									"HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
 					Thread.sleep(defaultDelay * 1000);
 					errorList
 						.add(
 							String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
 					urlConn.disconnect();
 					return attemptDownload(requestUrl, retryNumber + 1, errorList);
 				} else {
 					input = urlConn.getInputStream();
 					responseType = urlConn.getContentType();
 					return input;
 				}
 			} catch (final IOException e) {
 				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
 				Thread.sleep(defaultDelay * 1000);
 				errorList.add(e.getMessage());
 				return attemptDownload(requestUrl, retryNumber + 1, errorList);
 			}
 		} catch (final InterruptedException e) {
 			throw new DnetCollectorException(e);
 		}
 	}
 	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
 		log.debug("StatusCode: " + urlConn.getResponseMessage());
 		for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
 			if (e.getKey() != null) {
 				for (final String v : e.getValue()) {
 					log.debug("  key: " + e.getKey() + " - value: " + v);
 				}
 			}
 		}
 	}
 	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
 		for (final String key : headerMap.keySet()) {
 			if (key != null
 				&& key.toLowerCase().equals("retry-after")
 				&& headerMap.get(key).size() > 0
 				&& NumberUtils.isNumber(headerMap.get(key).get(0))) {
 				return Integer.parseInt(headerMap.get(key).get(0)) + 10;
 			}
 		}
 		return -1;
 	}
 	private String obtainNewLocation(final Map<String, List<String>> headerMap)
 		throws DnetCollectorException {
 		for (final String key : headerMap.keySet()) {
 			if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) {
 				return headerMap.get(key).get(0);
 			}
 		}
 		throw new DnetCollectorException(
 			"The requested url has been MOVED, but 'location' param is MISSING");
 	}
 	/**
 	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
 	 */
 	public void initTrustManager() {
 		final X509TrustManager tm = new X509TrustManager() {
 			@Override
 			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
 			}
 			@Override
 			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
 			}
 			@Override
 			public X509Certificate[] getAcceptedIssuers() {
 				return null;
 			}
 		};
 		try {
 			final SSLContext ctx = SSLContext.getInstance("TLS");
 			ctx.init(null, new TrustManager[] {
 				tm
 			}, null);
 			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
 		} catch (final GeneralSecurityException e) {
 			log.fatal(e);
 			throw new IllegalStateException(e);
 		}
 	}
 	public int getMaxNumberOfRetry() {
 		return maxNumberOfRetry;
 	}
 	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
 		this.maxNumberOfRetry = maxNumberOfRetry;
 	}
 	public int getDefaultDelay() {
 		return defaultDelay;
 	}
 	public void setDefaultDelay(final int defaultDelay) {
 		this.defaultDelay = defaultDelay;
 	}
 	public int getReadTimeOut() {
 		return readTimeOut;
 	}
 	public void setReadTimeOut(final int readTimeOut) {
 		this.readTimeOut = readTimeOut;
 	}
 	public String getResponseType() {
 		return responseType;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
@ -0,0 +1,29 @@
 package eu.dnetlib.dhp.transformation;
 public class DnetTransformationException extends Exception {
 	public DnetTransformationException() {
 		super();
 	}
 	public DnetTransformationException(
 		final String message,
 		final Throwable cause,
 		final boolean enableSuppression,
 		final boolean writableStackTrace) {
 		super(message, cause, enableSuppression, writableStackTrace);
 	}
 	public DnetTransformationException(final String message, final Throwable cause) {
 		super(message, cause);
 	}
 	public DnetTransformationException(final String message) {
 		super(message);
 	}
 	public DnetTransformationException(final Throwable cause) {
 		super(cause);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
@ -1,74 +0,0 @@
 package eu.dnetlib.dhp.transformation;
 import java.io.ByteArrayInputStream;
 import java.io.StringWriter;
 import java.util.Map;
 import javax.xml.transform.stream.StreamSource;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.util.LongAccumulator;
 import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
 import eu.dnetlib.dhp.transformation.functions.Cleaner;
 import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
 import net.sf.saxon.s9api.*;
 public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
 	private final LongAccumulator totalItems;
 	private final LongAccumulator errorItems;
 	private final LongAccumulator transformedItems;
 	private final String transformationRule;
 	private final Cleaner cleanFunction;
 	private final long dateOfTransformation;
 	public TransformFunction(
 		LongAccumulator totalItems,
 		LongAccumulator errorItems,
 		LongAccumulator transformedItems,
 		final String transformationRule,
 		long dateOfTransformation,
 		final Map<String, Vocabulary> vocabularies)
 		throws Exception {
 		this.totalItems = totalItems;
 		this.errorItems = errorItems;
 		this.transformedItems = transformedItems;
 		this.transformationRule = transformationRule;
 		this.dateOfTransformation = dateOfTransformation;
 		cleanFunction = new Cleaner(vocabularies);
 	}
 	@Override
 	public MetadataRecord call(MetadataRecord value) {
 		totalItems.add(1);
 		try {
 			Processor processor = new Processor(false);
 			processor.registerExtensionFunction(cleanFunction);
 			final XsltCompiler comp = processor.newXsltCompiler();
 			XsltExecutable xslt = comp
 				.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
 			XdmNode source = processor
 				.newDocumentBuilder()
 				.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
 			XsltTransformer trans = xslt.load();
 			trans.setInitialContextNode(source);
 			final StringWriter output = new StringWriter();
 			Serializer out = processor.newSerializer(output);
 			out.setOutputProperty(Serializer.Property.METHOD, "xml");
 			out.setOutputProperty(Serializer.Property.INDENT, "yes");
 			trans.setDestination(out);
 			trans.transform();
 			final String xml = output.toString();
 			value.setBody(xml);
 			value.setDateOfTransformation(dateOfTransformation);
 			transformedItems.add(1);
 			return value;
 		} catch (Throwable e) {
 			errorItems.add(1);
 			return null;
 		}
 	}
 }
--- a/Show More
+++ b/Show More
`@ -1,5 +1,5 @@`

	`package eu.dnetlib.dhp.oa.graph.raw.common;`	`package eu.dnetlib.dhp.common.vocabulary;`

	`import java.io.Serializable;`	`import java.io.Serializable;`
		`@ -0,0 +1 @@`
							`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}`
		`@ -0,0 +1 @@`
							{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10\|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}