Merge pull request '[graph indexing] sets spark memoryOverhead in the join operations to the same value used for the memory executor' (#426 ) from provision_memoryOverhead into master

Reviewed-on: #426
[graph indexing] sets spark memoryOverhead in the join operations to the same value used for the memory executor
2024-04-19 16:59:45 +02:00 · 2024-04-19 16:57:55 +02:00 · 2024-04-18 11:25:24 +02:00 · 2024-04-18 11:23:43 +02:00 · 2024-04-17 16:40:29 +02:00 · 2024-04-17 15:13:28 +02:00
337 changed files with 8849 additions and 132303 deletions
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -63,13 +63,11 @@

 	<dependencies>
 		<dependency>
-			<groupId>edu.cmu</groupId>
-			<artifactId>secondstring</artifactId>
-		</dependency>
-		<dependency>
-			<groupId>com.ibm.icu</groupId>
-			<artifactId>icu4j</artifactId>
+			<groupId>eu.dnetlib.dhp</groupId>
+			<artifactId>dhp-pace-core</artifactId>
+			<version>${project.version}</version>
 		</dependency>
+
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -1,53 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import okhttp3.MediaType;
-import okhttp3.RequestBody;
-import okhttp3.internal.Util;
-import okio.BufferedSink;
-import okio.Okio;
-import okio.Source;
-
-public class InputStreamRequestBody extends RequestBody {
-
-	private final InputStream inputStream;
-	private final MediaType mediaType;
-	private final long lenght;
-
-	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
-
-		return new InputStreamRequestBody(inputStream, mediaType, len);
-	}
-
-	private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
-		this.inputStream = inputStream;
-		this.mediaType = mediaType;
-		this.lenght = len;
-	}
-
-	@Override
-	public MediaType contentType() {
-		return mediaType;
-	}
-
-	@Override
-	public long contentLength() {
-
-		return lenght;
-
-	}
-
-	@Override
-	public void writeTo(BufferedSink sink) throws IOException {
-		Source source = null;
-		try {
-			source = Okio.source(inputStream);
-			sink.writeAll(source);
-		} finally {
-			Util.closeQuietly(source);
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@ -1,8 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-public class MissingConceptDoiException extends Throwable {
-	public MissingConceptDoiException(String message) {
-		super(message);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -1,365 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.*;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.http.HttpHeaders;
-import org.apache.http.entity.ContentType;
-import org.jetbrains.annotations.NotNull;
-
-import com.google.gson.Gson;
-
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
-import okhttp3.*;
-
-public class ZenodoAPIClient implements Serializable {
-
-	String urlString;
-	String bucket;
-
-	String deposition_id;
-	String access_token;
-
-	public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
-
-	private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
-
-	public String getUrlString() {
-		return urlString;
-	}
-
-	public void setUrlString(String urlString) {
-		this.urlString = urlString;
-	}
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public void setDeposition_id(String deposition_id) {
-		this.deposition_id = deposition_id;
-	}
-
-	public ZenodoAPIClient(String urlString, String access_token) {
-
-		this.urlString = urlString;
-		this.access_token = access_token;
-	}
-
-	/**
-	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int newDeposition() throws IOException {
-		String json = "{}";
-
-		URL url = new URL(urlString);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
-		this.bucket = newSubmission.getLinks().getBucket();
-		this.deposition_id = newSubmission.getId();
-
-		return responseCode;
-	}
-
-	/**
-	 * Upload files in Zenodo.
-	 *
-	 * @param is the inputStream for the file to upload
-	 * @param file_name the name of the file as it will appear on Zenodo
-	 * @return the response code
-	 */
-	public int uploadIS(InputStream is, String file_name) throws IOException {
-
-		URL url = new URL(bucket + "/" + file_name);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		byte[] buf = new byte[8192];
-		int length;
-		try (OutputStream os = conn.getOutputStream()) {
-			while ((length = is.read(buf)) != -1) {
-				os.write(buf, 0, length);
-			}
-
-		}
-		int responseCode = conn.getResponseCode();
-		if (!checkOKStatus(responseCode)) {
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-		}
-
-		return responseCode;
-	}
-
-	@NotNull
-	private String getBody(HttpURLConnection conn) throws IOException {
-		String body = "{}";
-		try (BufferedReader br = new BufferedReader(
-			new InputStreamReader(conn.getInputStream(), "utf-8"))) {
-			StringBuilder response = new StringBuilder();
-			String responseLine = null;
-			while ((responseLine = br.readLine()) != null) {
-				response.append(responseLine.trim());
-			}
-
-			body = response.toString();
-
-		}
-		return body;
-	}
-
-	/**
-	 * Associates metadata information to the current deposition
-	 *
-	 * @param metadata the metadata
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int sendMretadata(String metadata) throws IOException {
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = metadata.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		final int responseCode = conn.getResponseCode();
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-
-		return responseCode;
-
-	}
-
-	private boolean checkOKStatus(int responseCode) {
-
-		if (HttpURLConnection.HTTP_OK != responseCode ||
-			HttpURLConnection.HTTP_CREATED != responseCode)
-			return true;
-		return false;
-	}
-
-	/**
-	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	@Deprecated
-	public int publish() throws IOException {
-
-		String json = "{}";
-
-		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
-
-		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
-
-		Request request = new Request.Builder()
-			.url(urlString + "/" + deposition_id + "/actions/publish")
-			.addHeader("Authorization", "Bearer " + access_token)
-			.post(body)
-			.build();
-
-		try (Response response = httpClient.newCall(request).execute()) {
-
-			if (!response.isSuccessful())
-				throw new IOException("Unexpected code " + response + response.body().string());
-
-			return response.code();
-
-		}
-	}
-
-	/**
-	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
-	 * for the new version.
-	 *
-	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
-	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 * @throws IOException
-	 * @throws MissingConceptDoiException
-	 */
-	public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
-		setDepositionId(concept_rec_id, 1);
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("POST");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		String latest_draft = zenodoModel.getLinks().getLatest_draft();
-		deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
-		bucket = getBucket(latest_draft);
-
-		return responseCode;
-
-	}
-
-	/**
-	 * To finish uploading a version or new deposition not published
-	 * It sets the deposition_id and the bucket to be used
-	 *
-	 *
-	 * @param deposition_id the deposition id of the not yet published upload
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 * @throws IOException
-	 * @throws MissingConceptDoiException
-	 */
-	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
-
-		this.deposition_id = deposition_id;
-
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		bucket = zenodoModel.getLinks().getBucket();
-
-		return responseCode;
-
-	}
-
-	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
-
-		ZenodoModelList zenodoModelList = new Gson()
-			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
-
-		for (ZenodoModel zm : zenodoModelList) {
-			if (zm.getConceptrecid().equals(concept_rec_id)) {
-				deposition_id = zm.getId();
-				return;
-			}
-		}
-		if (zenodoModelList.size() == 0)
-			throw new MissingConceptDoiException(
-				"The concept record id specified was missing in the list of depositions");
-		setDepositionId(concept_rec_id, page + 1);
-
-	}
-
-	private String getPrevDepositions(String page) throws IOException {
-
-		HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
-		urlBuilder.addQueryParameter("page", page);
-
-		URL url = new URL(urlBuilder.build().toString());
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		return body;
-
-	}
-
-	private String getBucket(String inputUurl) throws IOException {
-
-		URL url = new URL(inputUurl);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-
-		return zenodoModel.getLinks().getBucket();
-
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@ -1,14 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Community {
-	private String identifier;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Creator {
-	private String affiliation;
-	private String name;
-	private String orcid;
-
-	public String getAffiliation() {
-		return affiliation;
-	}
-
-	public void setAffiliation(String affiliation) {
-		this.affiliation = affiliation;
-	}
-
-	public String getName() {
-		return name;
-	}
-
-	public void setName(String name) {
-		this.name = name;
-	}
-
-	public String getOrcid() {
-		return orcid;
-	}
-
-	public void setOrcid(String orcid) {
-		this.orcid = orcid;
-	}
-
-	public static Creator newInstance(String name, String affiliation, String orcid) {
-		Creator c = new Creator();
-		if (name != null) {
-			c.name = name;
-		}
-		if (affiliation != null) {
-			c.affiliation = affiliation;
-		}
-		if (orcid != null) {
-			c.orcid = orcid;
-		}
-
-		return c;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -1,44 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class File implements Serializable {
-	private String checksum;
-	private String filename;
-	private long filesize;
-	private String id;
-
-	public String getChecksum() {
-		return checksum;
-	}
-
-	public void setChecksum(String checksum) {
-		this.checksum = checksum;
-	}
-
-	public String getFilename() {
-		return filename;
-	}
-
-	public void setFilename(String filename) {
-		this.filename = filename;
-	}
-
-	public long getFilesize() {
-		return filesize;
-	}
-
-	public void setFilesize(long filesize) {
-		this.filesize = filesize;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@ -1,23 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Grant implements Serializable {
-	private String id;
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public static Grant newInstance(String id) {
-		Grant g = new Grant();
-		g.id = id;
-
-		return g;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@ -1,92 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Links implements Serializable {
-
-	private String bucket;
-
-	private String discard;
-
-	private String edit;
-	private String files;
-	private String html;
-	private String latest_draft;
-	private String latest_draft_html;
-	private String publish;
-
-	private String self;
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public String getDiscard() {
-		return discard;
-	}
-
-	public void setDiscard(String discard) {
-		this.discard = discard;
-	}
-
-	public String getEdit() {
-		return edit;
-	}
-
-	public void setEdit(String edit) {
-		this.edit = edit;
-	}
-
-	public String getFiles() {
-		return files;
-	}
-
-	public void setFiles(String files) {
-		this.files = files;
-	}
-
-	public String getHtml() {
-		return html;
-	}
-
-	public void setHtml(String html) {
-		this.html = html;
-	}
-
-	public String getLatest_draft() {
-		return latest_draft;
-	}
-
-	public void setLatest_draft(String latest_draft) {
-		this.latest_draft = latest_draft;
-	}
-
-	public String getLatest_draft_html() {
-		return latest_draft_html;
-	}
-
-	public void setLatest_draft_html(String latest_draft_html) {
-		this.latest_draft_html = latest_draft_html;
-	}
-
-	public String getPublish() {
-		return publish;
-	}
-
-	public void setPublish(String publish) {
-		this.publish = publish;
-	}
-
-	public String getSelf() {
-		return self;
-	}
-
-	public void setSelf(String self) {
-		this.self = self;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@ -1,153 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class Metadata implements Serializable {
-
-	private String access_right;
-	private List<Community> communities;
-	private List<Creator> creators;
-	private String description;
-	private String doi;
-	private List<Grant> grants;
-	private List<String> keywords;
-	private String language;
-	private String license;
-	private PrereserveDoi prereserve_doi;
-	private String publication_date;
-	private List<String> references;
-	private List<RelatedIdentifier> related_identifiers;
-	private String title;
-	private String upload_type;
-	private String version;
-
-	public String getUpload_type() {
-		return upload_type;
-	}
-
-	public void setUpload_type(String upload_type) {
-		this.upload_type = upload_type;
-	}
-
-	public String getVersion() {
-		return version;
-	}
-
-	public void setVersion(String version) {
-		this.version = version;
-	}
-
-	public String getAccess_right() {
-		return access_right;
-	}
-
-	public void setAccess_right(String access_right) {
-		this.access_right = access_right;
-	}
-
-	public List<Community> getCommunities() {
-		return communities;
-	}
-
-	public void setCommunities(List<Community> communities) {
-		this.communities = communities;
-	}
-
-	public List<Creator> getCreators() {
-		return creators;
-	}
-
-	public void setCreators(List<Creator> creators) {
-		this.creators = creators;
-	}
-
-	public String getDescription() {
-		return description;
-	}
-
-	public void setDescription(String description) {
-		this.description = description;
-	}
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public List<Grant> getGrants() {
-		return grants;
-	}
-
-	public void setGrants(List<Grant> grants) {
-		this.grants = grants;
-	}
-
-	public List<String> getKeywords() {
-		return keywords;
-	}
-
-	public void setKeywords(List<String> keywords) {
-		this.keywords = keywords;
-	}
-
-	public String getLanguage() {
-		return language;
-	}
-
-	public void setLanguage(String language) {
-		this.language = language;
-	}
-
-	public String getLicense() {
-		return license;
-	}
-
-	public void setLicense(String license) {
-		this.license = license;
-	}
-
-	public PrereserveDoi getPrereserve_doi() {
-		return prereserve_doi;
-	}
-
-	public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
-		this.prereserve_doi = prereserve_doi;
-	}
-
-	public String getPublication_date() {
-		return publication_date;
-	}
-
-	public void setPublication_date(String publication_date) {
-		this.publication_date = publication_date;
-	}
-
-	public List<String> getReferences() {
-		return references;
-	}
-
-	public void setReferences(List<String> references) {
-		this.references = references;
-	}
-
-	public List<RelatedIdentifier> getRelated_identifiers() {
-		return related_identifiers;
-	}
-
-	public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
-		this.related_identifiers = related_identifiers;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@ -1,25 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class PrereserveDoi implements Serializable {
-	private String doi;
-	private String recid;
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public String getRecid() {
-		return recid;
-	}
-
-	public void setRecid(String recid) {
-		this.recid = recid;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@ -1,43 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class RelatedIdentifier implements Serializable {
-	private String identifier;
-	private String relation;
-	private String resource_type;
-	private String scheme;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-
-	public String getRelation() {
-		return relation;
-	}
-
-	public void setRelation(String relation) {
-		this.relation = relation;
-	}
-
-	public String getResource_type() {
-		return resource_type;
-	}
-
-	public void setResource_type(String resource_type) {
-		this.resource_type = resource_type;
-	}
-
-	public String getScheme() {
-		return scheme;
-	}
-
-	public void setScheme(String scheme) {
-		this.scheme = scheme;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@ -1,118 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class ZenodoModel implements Serializable {
-
-	private String conceptrecid;
-	private String created;
-
-	private List<File> files;
-	private String id;
-	private Links links;
-	private Metadata metadata;
-	private String modified;
-	private String owner;
-	private String record_id;
-	private String state;
-	private boolean submitted;
-	private String title;
-
-	public String getConceptrecid() {
-		return conceptrecid;
-	}
-
-	public void setConceptrecid(String conceptrecid) {
-		this.conceptrecid = conceptrecid;
-	}
-
-	public String getCreated() {
-		return created;
-	}
-
-	public void setCreated(String created) {
-		this.created = created;
-	}
-
-	public List<File> getFiles() {
-		return files;
-	}
-
-	public void setFiles(List<File> files) {
-		this.files = files;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public Links getLinks() {
-		return links;
-	}
-
-	public void setLinks(Links links) {
-		this.links = links;
-	}
-
-	public Metadata getMetadata() {
-		return metadata;
-	}
-
-	public void setMetadata(Metadata metadata) {
-		this.metadata = metadata;
-	}
-
-	public String getModified() {
-		return modified;
-	}
-
-	public void setModified(String modified) {
-		this.modified = modified;
-	}
-
-	public String getOwner() {
-		return owner;
-	}
-
-	public void setOwner(String owner) {
-		this.owner = owner;
-	}
-
-	public String getRecord_id() {
-		return record_id;
-	}
-
-	public void setRecord_id(String record_id) {
-		this.record_id = record_id;
-	}
-
-	public String getState() {
-		return state;
-	}
-
-	public void setState(String state) {
-		this.state = state;
-	}
-
-	public boolean isSubmitted() {
-		return submitted;
-	}
-
-	public void setSubmitted(boolean submitted) {
-		this.submitted = submitted;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@ -1,7 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.util.ArrayList;
-
-public class ZenodoModelList extends ArrayList<ZenodoModel> {
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -10,7 +10,6 @@ import org.apache.commons.lang3.StringUtils;
 import com.wcohen.ss.JaroWinkler;

 import eu.dnetlib.dhp.schema.oaf.Author;
-import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.pace.model.Person;
 import scala.Tuple2;
@ -146,21 +145,110 @@ public class AuthorMerger {
 		return null;
 	}

+	/**
+	 * This method tries to figure out when two author are the same in the contest
+	 * of ORCID enrichment
+	 *
+	 * @param left  Author in the OAF entity
+	 * @param right Author ORCID
+	 * @return based on a heuristic on the names of the authors if they are the same.
+	 */
+	public static boolean checkORCIDSimilarity(final Author left, final Author right) {
+		final Person pl = parse(left);
+		final Person pr = parse(right);
+
+		// If one of them didn't have a surname we verify if they have the fullName not empty
+		// and verify if the normalized version is equal
+		if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
+			pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
+
+			if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
+				&& !pr.getFullname().isEmpty()) {
+				return pl
+					.getFullname()
+					.stream()
+					.anyMatch(
+						fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
+			} else {
+				return false;
+			}
+		}
+		// The Authors have one surname in common
+		if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
+
+			// If one of them has only a surname and is the same we can say that they are the same author
+			if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
+				(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
+				return true;
+			// The authors have the same initials of Name in common
+			if (pl
+				.getName()
+				.stream()
+				.anyMatch(
+					nl -> pr
+						.getName()
+						.stream()
+						.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
+				return true;
+		}
+
+		// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
+		// We verify if we have an exact match between name and surname
+		if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
+			pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
+			return true;
+		else
+			return false;
+	}
+	//
+
+	/**
+	 * Method to enrich ORCID information in one list of authors based on another list
+	 *
+	 * @param baseAuthor  the Author List in the OAF Entity
+	 * @param orcidAuthor The list of ORCID Author intersected
+	 * @return The Author List of the OAF Entity enriched with the orcid Author
+	 */
+	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
+
+		if (baseAuthor == null || baseAuthor.isEmpty())
+			return orcidAuthor;
+
+		if (orcidAuthor == null || orcidAuthor.isEmpty())
+			return baseAuthor;
+
+		if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
+			return baseAuthor;
+
+		final List<Author> oAuthor = new ArrayList<>();
+		oAuthor.addAll(orcidAuthor);
+
+		baseAuthor.forEach(ba -> {
+			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
+			if (aMatch.isPresent()) {
+				final Author sameAuthor = aMatch.get();
+				addPid(ba, sameAuthor.getPid());
+				oAuthor.remove(sameAuthor);
+			}
+		});
+		return baseAuthor;
+	}
+
+	private static void addPid(final Author a, final List<StructuredProperty> pids) {
+
+		if (a.getPid() == null) {
+			a.setPid(new ArrayList<>());
+		}
+
+		a.getPid().addAll(pids);
+
+	}
+
 	public static String pidToComparableString(StructuredProperty pid) {
-		final String classId = Optional
-			.ofNullable(pid)
-			.map(
-				p -> Optional
-					.ofNullable(p.getQualifier())
-					.map(Qualifier::getClassid)
-					.map(String::toLowerCase)
-					.orElse(""))
-			.orElse("");
-		return Optional
-			.ofNullable(pid)
-			.map(StructuredProperty::getValue)
-			.map(v -> String.join("|", v, classId))
-			.orElse("");
+		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
+			: "";
+		return (pid.getQualifier() != null ? classid : "")
+			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
 	}

 	public static int countAuthorsPids(List<Author> authors) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@ -14,7 +14,7 @@ import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -26,7 +26,7 @@ import eu.dnetlib.dhp.schema.common.EntityType;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
-import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -135,10 +135,10 @@ public class GroupEntitiesSparkJob {
 					.applyCoarVocabularies(entity, vocs),
 				OAFENTITY_KRYO_ENC)
 			.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, OafEntity, OafEntity>) MergeUtils::mergeById, OAFENTITY_KRYO_ENC)
+			.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
 			.map(
-				(MapFunction<OafEntity, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
-					t.getClass().getName(), t),
+				(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
+					t._2().getClass().getName(), t._2()),
 				Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));

 		// pivot on "_1" (classname of the entity)
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
@ -1,76 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.HashSet;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.Set;
-
-import org.apache.commons.lang3.StringUtils;
-
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class CleaningFunctions {
-
-	public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
-	public static final String DOI_PREFIX = "10.";
-
-	public static final Set<String> PID_BLACKLIST = new HashSet<>();
-
-	static {
-		PID_BLACKLIST.add("none");
-		PID_BLACKLIST.add("na");
-	}
-
-	public CleaningFunctions() {
-	}
-
-	/**
-	 * Utility method that filter PID values on a per-type basis.
-	 * @param s the PID whose value will be checked.
-	 * @return false if the pid matches the filter criteria, true otherwise.
-	 */
-	public static boolean pidFilter(StructuredProperty s) {
-		final String pidValue = s.getValue();
-		if (Objects.isNull(s.getQualifier()) ||
-			StringUtils.isBlank(pidValue) ||
-			StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
-			return false;
-		}
-		if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
-			return false;
-		}
-		return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
-	}
-
-	/**
-	 * Utility method that normalises PID values on a per-type basis.
-	 * @param pid the PID whose value will be normalised.
-	 * @return the PID containing the normalised value.
-	 */
-	public static StructuredProperty normalizePidValue(StructuredProperty pid) {
-		pid
-			.setValue(
-				normalizePidValue(
-					pid.getQualifier().getClassid(),
-					pid.getValue()));
-
-		return pid;
-	}
-
-	public static String normalizePidValue(String pidType, String pidValue) {
-		String value = Optional
-			.ofNullable(pidValue)
-			.map(String::trim)
-			.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
-
-		switch (pidType) {
-
-			// TODO add cleaning for more PID types as needed
-			case "doi":
-				return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
-		}
-		return value;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java
@ -1,8 +1,6 @@

 package eu.dnetlib.dhp.schema.oaf.utils;

-import org.apache.commons.lang3.StringUtils;
-
 public class DoiCleaningRule {

 	public static String clean(final String doi) {
@ -13,26 +11,4 @@ public class DoiCleaningRule {
 			.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
 	}

-	public static String normalizeDoi(final String input) {
-		if (input == null)
-			return null;
-		final String replaced = input
-			.replaceAll("\\n|\\r|\\t|\\s", "")
-			.toLowerCase()
-			.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
-		if (StringUtils.isEmpty(replaced))
-			return null;
-
-		if (!replaced.contains("10."))
-			return null;
-
-		final String ret = replaced.substring(replaced.indexOf("10."));
-
-		if (!ret.startsWith(CleaningFunctions.DOI_PREFIX))
-			return null;
-
-		return ret;
-
-	}
-
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -92,8 +92,6 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 		INVALID_AUTHOR_NAMES.add("null anonymous");
 		INVALID_AUTHOR_NAMES.add("unbekannt");
 		INVALID_AUTHOR_NAMES.add("unknown");
-		INVALID_AUTHOR_NAMES.add("autor, Sin");
-		INVALID_AUTHOR_NAMES.add("Desconocido / Inconnu,");

 		INVALID_URL_HOSTS.add("creativecommons.org");
 		INVALID_URL_HOSTS.add("www.academia.edu");
@ -508,8 +506,6 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 								.filter(Objects::nonNull)
 								.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
 								.map(GraphCleaningFunctions::cleanValue)
-								.sorted((s1, s2) -> s2.getValue().length() - s1.getValue().length())
-								.limit(ModelHardLimits.MAX_ABSTRACTS)
 								.collect(Collectors.toList()));
 				}
 				if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
@ -1,294 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-import java.security.MessageDigest;
-import java.util.*;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.lang3.StringUtils;
-
-import com.google.common.collect.HashBiMap;
-import com.google.common.collect.Maps;
-
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.*;
-
-/**
- * Factory class for OpenAIRE identifiers in the Graph
- */
-public class IdentifierFactory implements Serializable {
-
-	public static final String ID_SEPARATOR = "::";
-	public static final String ID_PREFIX_SEPARATOR = "|";
-
-	public static final int ID_PREFIX_LEN = 12;
-
-	/**
-	 * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
-	 * The id of the record (source_::id) will be rewritten as pidType_::id)
-	 */
-	public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
-
-	static {
-		PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
-		PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
-		PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
-		PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
-		PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
-
-		PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
-		PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
-		PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
-
-		PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
-		PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
-		PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
-
-		PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
-		PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
-
-		PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
-		PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
-	}
-
-	/**
-	 * Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
-	 * PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
-	 *
-	 * If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
-	 * See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
-	 */
-	public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
-
-	static {
-		DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
-		DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
-		DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
-		DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
-		DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
-	}
-
-	/**
-	 * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
-	 * Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
-	 * the same OpenAIRE id.
-	 */
-	public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
-
-	static {
-		ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
-		ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
-	}
-
-	public static Set<String> delegatedAuthorityDatasourceIds() {
-		return DELEGATED_PID_AUTHORITY
-			.values()
-			.stream()
-			.flatMap(m -> m.keySet().stream())
-			.collect(Collectors.toCollection(HashSet::new));
-	}
-
-	public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
-		return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
-	}
-
-	public static <T extends Result> String createDOIBoostIdentifier(T entity) {
-		if (entity == null)
-			return null;
-
-		StructuredProperty pid = null;
-		if (entity.getPid() != null) {
-			pid = entity
-				.getPid()
-				.stream()
-				.filter(Objects::nonNull)
-				.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
-				.filter(CleaningFunctions::pidFilter)
-				.findAny()
-				.orElse(null);
-		} else {
-			if (entity.getInstance() != null) {
-				pid = entity
-					.getInstance()
-					.stream()
-					.filter(i -> i.getPid() != null)
-					.flatMap(i -> i.getPid().stream())
-					.filter(CleaningFunctions::pidFilter)
-					.findAny()
-					.orElse(null);
-			}
-		}
-		if (pid != null)
-			return idFromPid(entity, pid, true);
-		return null;
-	}
-
-	/**
-	 * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
-	 * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
-	 *
-	 * @param entity the entity providing PIDs and a default ID.
-	 * @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
-	 * @param md5 indicates whether should hash the PID value or not.
-	 * @return an identifier from the most relevant PID, entity.id otherwise
-	 */
-	public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
-
-		checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
-
-		final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
-
-		return pids
-			.values()
-			.stream()
-			.flatMap(Set::stream)
-			.min(new PidComparator<>(entity))
-			.map(
-				min -> Optional
-					.ofNullable(pids.get(min.getQualifier().getClassid()))
-					.map(
-						p -> p
-							.stream()
-							.sorted(new PidValueComparator())
-							.findFirst()
-							.map(s -> idFromPid(entity, s, md5))
-							.orElseGet(entity::getId))
-					.orElseGet(entity::getId))
-			.orElseGet(entity::getId);
-	}
-
-	private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
-		if (entity instanceof Result) {
-			return Optional
-				.ofNullable(((Result) entity).getInstance())
-				.map(IdentifierFactory::mapPids)
-				.orElse(new HashMap<>());
-		} else {
-			return entity
-				.getPid()
-				.stream()
-				.map(CleaningFunctions::normalizePidValue)
-				.filter(CleaningFunctions::pidFilter)
-				.collect(
-					Collectors
-						.groupingBy(
-							p -> p.getQualifier().getClassid(),
-							Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
-		}
-	}
-
-	private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
-		return instance
-			.stream()
-			.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
-			.flatMap(Function.identity())
-			.collect(
-				Collectors
-					.groupingBy(
-						p -> p.getQualifier().getClassid(),
-						Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
-	}
-
-	private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
-		boolean mapHandles) {
-		return Optional
-			.ofNullable(pid)
-			.map(
-				pp -> pp
-					.stream()
-					// filter away PIDs provided by a DS that is not considered an authority for the
-					// given PID Type
-					.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
-					.map(CleaningFunctions::normalizePidValue)
-					.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
-					.filter(CleaningFunctions::pidFilter))
-			.orElse(Stream.empty());
-	}
-
-	private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
-		final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
-
-		if (Objects.isNull(collectedFrom)) {
-			return false;
-		}
-
-		boolean isEnrich = Optional
-			.ofNullable(ENRICHMENT_PROVIDER.get(pType))
-			.map(
-				enrich -> enrich.containsKey(collectedFrom.getKey())
-					|| enrich.containsValue(collectedFrom.getValue()))
-			.orElse(false);
-
-		boolean isAuthority = Optional
-			.ofNullable(PID_AUTHORITY.get(pType))
-			.map(
-				authorities -> authorities.containsKey(collectedFrom.getKey())
-					|| authorities.containsValue(collectedFrom.getValue()))
-			.orElse(false);
-
-		return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
-	}
-
-	private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
-		final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
-
-		final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
-		if (Objects.isNull(da)) {
-			return true;
-		}
-		if (!da.containsKey(collectedFrom.getKey())) {
-			return true;
-		}
-		return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
-	}
-
-	/**
-	 * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
-	 */
-	public static <T extends OafEntity> String createIdentifier(T entity) {
-
-		return createIdentifier(entity, true);
-	}
-
-	private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
-		return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
-	}
-
-	public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
-		return new StringBuilder()
-			.append(numericPrefix)
-			.append(ID_PREFIX_SEPARATOR)
-			.append(createPrefix(pidType))
-			.append(ID_SEPARATOR)
-			.append(md5 ? md5(pidValue) : pidValue)
-			.toString();
-	}
-
-	// create the prefix (length = 12)
-	private static String createPrefix(String pidType) {
-		StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
-		while (prefix.length() < ID_PREFIX_LEN) {
-			prefix.append("_");
-		}
-		return prefix.substring(0, ID_PREFIX_LEN);
-	}
-
-	public static String md5(final String s) {
-		try {
-			final MessageDigest md = MessageDigest.getInstance("MD5");
-			md.update(s.getBytes(StandardCharsets.UTF_8));
-			return new String(Hex.encodeHex(md.digest()));
-		} catch (final Exception e) {
-			return null;
-		}
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeComparator.java
@ -1,78 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-// 
-// Source code recreated from a .class file by IntelliJ IDEA
-// (powered by FernFlower decompiler)
-//
-import eu.dnetlib.dhp.schema.common.EntityType;
-import eu.dnetlib.dhp.schema.oaf.KeyValue;
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class MergeComparator implements Comparator<Oaf> {
-	public MergeComparator() {
-	}
-
-	public int compare(Oaf left, Oaf right) {
-		// nulls at the end
-		if (left == null && right == null) {
-			return 0;
-		} else if (left == null) {
-			return -1;
-		} else if (right == null) {
-			return 1;
-		}
-
-		// invisible
-		if (left.getDataInfo() != null && left.getDataInfo().getInvisible() == true) {
-			if (right.getDataInfo() != null && right.getDataInfo().getInvisible() == false) {
-				return -1;
-			}
-		}
-
-		// collectedfrom
-		HashSet<String> lCf = getCollectedFromIds(left);
-		HashSet<String> rCf = getCollectedFromIds(right);
-		if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
-			&& !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
-			return -1;
-		} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
-			&& rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
-			return 1;
-		}
-
-		SubEntityType lClass = SubEntityType.fromClass(left.getClass());
-		SubEntityType rClass = SubEntityType.fromClass(right.getClass());
-		return lClass.ordinal() - rClass.ordinal();
-
-	}
-
-	protected HashSet<String> getCollectedFromIds(Oaf left) {
-		return (HashSet) Optional.ofNullable(left.getCollectedfrom()).map((cf) -> {
-			return (HashSet) cf.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
-		}).orElse(new HashSet());
-	}
-
-	enum SubEntityType {
-		publication, dataset, software, otherresearchproduct, datasource, organization, project;
-
-		/**
-		 * Resolves the EntityType, given the relative class name
-		 *
-		 * @param clazz the given class name
-		 * @param <T> actual OafEntity subclass
-		 * @return the EntityType associated to the given class
-		 */
-		public static <T extends Oaf> SubEntityType fromClass(Class<T> clazz) {
-			return valueOf(clazz.getSimpleName().toLowerCase());
-		}
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java
@ -1,27 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-public class ModelHardLimits {
-
-	private ModelHardLimits() {
-	}
-
-	public static final String LAYOUT = "index";
-	public static final String INTERPRETATION = "openaire";
-	public static final String SEPARATOR = "-";
-
-	public static final int MAX_EXTERNAL_ENTITIES = 50;
-	public static final int MAX_AUTHORS = 200;
-	public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
-	public static final int MAX_TITLE_LENGTH = 5000;
-	public static final int MAX_TITLES = 10;
-	public static final int MAX_ABSTRACTS = 10;
-	public static final int MAX_ABSTRACT_LENGTH = 150000;
-	public static final int MAX_RELATED_ABSTRACT_LENGTH = 500;
-	public static final int MAX_INSTANCES = 10;
-
-	public static String getCollectionName(String format) {
-		return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -14,6 +14,7 @@ import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;

 import eu.dnetlib.dhp.schema.common.AccessRightComparator;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;

 public class OafMapperUtils {
@ -21,6 +22,65 @@ public class OafMapperUtils {
 	private OafMapperUtils() {
 	}

+	public static Oaf merge(final Oaf left, final Oaf right) {
+		if (ModelSupport.isSubClass(left, OafEntity.class)) {
+			return mergeEntities((OafEntity) left, (OafEntity) right);
+		} else if (ModelSupport.isSubClass(left, Relation.class)) {
+			((Relation) left).mergeFrom((Relation) right);
+		} else {
+			throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
+		}
+		return left;
+	}
+
+	public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
+		if (ModelSupport.isSubClass(left, Result.class)) {
+			return mergeResults((Result) left, (Result) right);
+		} else if (ModelSupport.isSubClass(left, Datasource.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Organization.class)) {
+			left.mergeFrom(right);
+		} else if (ModelSupport.isSubClass(left, Project.class)) {
+			left.mergeFrom(right);
+		} else {
+			throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
+		}
+		return left;
+	}
+
+	public static Result mergeResults(Result left, Result right) {
+
+		final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
+		final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
+
+		if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
+			return left;
+		}
+		if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
+			return right;
+		}
+
+		if (new ResultTypeComparator().compare(left, right) < 0) {
+			left.mergeFrom(right);
+			return left;
+		} else {
+			right.mergeFrom(left);
+			return right;
+		}
+	}
+
+	private static boolean isFromDelegatedAuthority(Result r) {
+		return Optional
+			.ofNullable(r.getInstance())
+			.map(
+				instance -> instance
+					.stream()
+					.filter(i -> Objects.nonNull(i.getCollectedfrom()))
+					.map(i -> i.getCollectedfrom().getKey())
+					.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
+			.orElse(false);
+	}
+
 	public static KeyValue keyValue(final String k, final String v) {
 		final KeyValue kv = new KeyValue();
 		kv.setKey(k);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
@ -1,46 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class OrganizationPidComparator implements Comparator<StructuredProperty> {
-
-	@Override
-	public int compare(StructuredProperty left, StructuredProperty right) {
-		if (left == null) {
-			return right == null ? 0 : -1;
-		} else if (right == null) {
-			return 1;
-		}
-
-		PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
-		PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (lClass.equals(PidType.openorgs))
-			return -1;
-		if (rClass.equals(PidType.openorgs))
-			return 1;
-
-		if (lClass.equals(PidType.GRID))
-			return -1;
-		if (rClass.equals(PidType.GRID))
-			return 1;
-
-		if (lClass.equals(PidType.mag_id))
-			return -1;
-		if (rClass.equals(PidType.mag_id))
-			return 1;
-
-		if (lClass.equals(PidType.urn))
-			return -1;
-		if (rClass.equals(PidType.urn))
-			return 1;
-
-		return 0;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java
@ -1,8 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.HashMap;
-import java.util.HashSet;
-
-public class PidBlacklist extends HashMap<String, HashSet<String>> {
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java
@ -1,40 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Optional;
-import java.util.Set;
-
-import org.apache.commons.io.IOUtils;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-public class PidBlacklistProvider {
-
-	private static final PidBlacklist blacklist;
-
-	static {
-		try {
-			String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
-			blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
-
-		} catch (IOException e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public static PidBlacklist getBlacklist() {
-		return blacklist;
-	}
-
-	public static Set<String> getBlacklist(String pidType) {
-		return Optional
-			.ofNullable(getBlacklist().get(pidType))
-			.orElse(new HashSet<>());
-	}
-
-	private PidBlacklistProvider() {
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
@ -1,48 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.Organization;
-import eu.dnetlib.dhp.schema.oaf.Result;
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
-
-	private final T entity;
-
-	public PidComparator(T entity) {
-		this.entity = entity;
-	}
-
-	@Override
-	public int compare(StructuredProperty left, StructuredProperty right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		if (ModelSupport.isSubClass(entity, Result.class)) {
-			return compareResultPids(left, right);
-		}
-		if (ModelSupport.isSubClass(entity, Organization.class)) {
-			return compareOrganizationtPids(left, right);
-		}
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
-	}
-
-	private int compareResultPids(StructuredProperty left, StructuredProperty right) {
-		return new ResultPidComparator().compare(left, right);
-	}
-
-	private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
-		return new OrganizationPidComparator().compare(left, right);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
@ -1,79 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import org.apache.commons.lang3.EnumUtils;
-
-public enum PidType {
-
-	/**
-	 * The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
-	 *
-	 * There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
-	 *
-	 * The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
-	 * of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
-	 * defined for an application by the ISO 26324 Registration Authority.
-	 *
-	 *
-	 * DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
-	 * These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
-	 * distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
-	 * resolution system.
-	 *
-	 * Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
-	 * unique string assigned to a registrant.
-	 *
-	 * DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
-	 * Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
-	 * or it might incorporate an identifier generated from or based on another system used by the registrant
-	 * (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
-	 * specified, as in Example 1).
-	 *
-	 * Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
-	 */
-	doi,
-
-	/**
-	 * PubMed Unique Identifier (PMID)
-	 *
-	 * This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
-	 * accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
-	 *
-	 * Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
-	 * (e.g., 21804956.2). All citations are considered version 1 until replaced.  The extended PMID is not displayed
-	 * on the MEDLINE format.
-	 *
-	 * View the citation in abstract format in PubMed to access additional versions when available (see the article in
-	 * the Jan-Feb 2012 NLM Technical Bulletin).
-	 *
-	 * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
-	 */
-	pmid,
-
-	/**
-	 * This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
-	 * prefix PMC.
-	 *
-	 * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
-	 */
-	pmc, handle, arXiv, nct, pdb, w3id,
-
-	// Organization
-	openorgs, ROR, GRID, PIC, ISNI, Wikidata, FundRef, corda, corda_h2020, mag_id, urn,
-
-	// Used by dedup
-	undefined, original;
-
-	public static boolean isValid(String type) {
-		return EnumUtils.isValidEnum(PidType.class, type);
-	}
-
-	public static PidType tryValueOf(String s) {
-		try {
-			return PidType.valueOf(s);
-		} catch (Exception e) {
-			return PidType.original;
-		}
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java
@ -1,33 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-import java.util.Optional;
-
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class PidValueComparator implements Comparator<StructuredProperty> {
-
-	@Override
-	public int compare(StructuredProperty left, StructuredProperty right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		StructuredProperty l = CleaningFunctions.normalizePidValue(left);
-		StructuredProperty r = CleaningFunctions.normalizePidValue(right);
-
-		return Optional
-			.ofNullable(l.getValue())
-			.map(
-				lv -> Optional
-					.ofNullable(r.getValue())
-					.map(rv -> lv.compareTo(rv))
-					.orElse(-1))
-			.orElse(1);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RefereedComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RefereedComparator.java
@ -1,46 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.oaf.Qualifier;
-
-/**
- * Comparator for sorting the values from the dnet:review_levels vocabulary, implements the following ordering
- *
- * peerReviewed (0001) > nonPeerReviewed (0002) > UNKNOWN (0000)
- */
-public class RefereedComparator implements Comparator<Qualifier> {
-
-	@Override
-	public int compare(Qualifier left, Qualifier right) {
-		if (left == null || left.getClassid() == null) {
-			return (right == null || right.getClassid() == null) ? 0 : -1;
-		} else if (right == null || right.getClassid() == null) {
-			return 1;
-		}
-
-		String lClass = left.getClassid();
-		String rClass = right.getClassid();
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if ("0001".equals(lClass))
-			return -1;
-		if ("0001".equals(rClass))
-			return 1;
-
-		if ("0002".equals(lClass))
-			return -1;
-		if ("0002".equals(rClass))
-			return 1;
-
-		if ("0000".equals(lClass))
-			return -1;
-		if ("0000".equals(rClass))
-			return 1;
-
-		return 0;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
@ -1,56 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Comparator;
-
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class ResultPidComparator implements Comparator<StructuredProperty> {
-
-	@Override
-	public int compare(StructuredProperty left, StructuredProperty right) {
-
-		PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
-		PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
-
-		if (lClass.equals(rClass))
-			return 0;
-
-		if (lClass.equals(PidType.doi))
-			return -1;
-		if (rClass.equals(PidType.doi))
-			return 1;
-
-		if (lClass.equals(PidType.pmid))
-			return -1;
-		if (rClass.equals(PidType.pmid))
-			return 1;
-
-		if (lClass.equals(PidType.pmc))
-			return -1;
-		if (rClass.equals(PidType.pmc))
-			return 1;
-
-		if (lClass.equals(PidType.handle))
-			return -1;
-		if (rClass.equals(PidType.handle))
-			return 1;
-
-		if (lClass.equals(PidType.arXiv))
-			return -1;
-		if (rClass.equals(PidType.arXiv))
-			return 1;
-
-		if (lClass.equals(PidType.nct))
-			return -1;
-		if (rClass.equals(PidType.nct))
-			return 1;
-
-		if (lClass.equals(PidType.pdb))
-			return -1;
-		if (rClass.equals(PidType.pdb))
-			return 1;
-
-		return 0;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
@ -1,87 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
-
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.KeyValue;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class ResultTypeComparator implements Comparator<Result> {
-
-	public static final ResultTypeComparator INSTANCE = new ResultTypeComparator();
-
-	@Override
-	public int compare(Result left, Result right) {
-
-		if (left == null && right == null)
-			return 0;
-		if (left == null)
-			return 1;
-		if (right == null)
-			return -1;
-
-		HashSet<String> lCf = getCollectedFromIds(left);
-		HashSet<String> rCf = getCollectedFromIds(right);
-
-		if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
-			return -1;
-		}
-		if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
-			return 1;
-		}
-
-		if (left.getResulttype() == null || left.getResulttype().getClassid() == null) {
-			if (right.getResulttype() == null || right.getResulttype().getClassid() == null) {
-				return 0;
-			}
-			return 1;
-		} else if (right.getResulttype() == null || right.getResulttype().getClassid() == null) {
-			return -1;
-		}
-
-		String lClass = left.getResulttype().getClassid();
-		String rClass = right.getResulttype().getClassid();
-
-		if (!lClass.equals(rClass)) {
-			if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-				return -1;
-			if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
-				return 1;
-
-			if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-				return -1;
-			if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
-				return 1;
-
-			if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-				return -1;
-			if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
-				return 1;
-
-			if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-				return -1;
-			if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
-				return 1;
-		}
-
-		// Else (but unlikely), lexicographical ordering will do.
-		return lClass.compareTo(rClass);
-	}
-
-	protected HashSet<String> getCollectedFromIds(Result left) {
-		return Optional
-			.ofNullable(left.getCollectedfrom())
-			.map(
-				cf -> cf
-					.stream()
-					.map(KeyValue::getKey)
-					.collect(Collectors.toCollection(HashSet::new)))
-			.orElse(new HashSet<>());
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/pace/common/PaceCommonUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/pace/common/PaceCommonUtils.java
@ -1,101 +0,0 @@
-
-package eu.dnetlib.pace.common;
-
-import java.nio.charset.StandardCharsets;
-import java.text.Normalizer;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-
-import com.google.common.base.Splitter;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Sets;
-import com.ibm.icu.text.Transliterator;
-
-/**
- * Set of common functions for the framework
- *
- * @author claudio
- */
-public class PaceCommonUtils {
-
-	// transliterator
-	protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
-	protected static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
-	protected static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
-
-	protected static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
-
-	protected static String fixAliases(final String s) {
-		final StringBuilder sb = new StringBuilder();
-
-		s.chars().forEach(ch -> {
-			final int i = StringUtils.indexOf(aliases_from, ch);
-			sb.append(i >= 0 ? aliases_to.charAt(i) : (char) ch);
-		});
-
-		return sb.toString();
-	}
-
-	protected static String transliterate(final String s) {
-		try {
-			return transliterator.transliterate(s);
-		} catch (Exception e) {
-			return s;
-		}
-	}
-
-	public static String normalize(final String s) {
-		return fixAliases(transliterate(nfd(unicodeNormalization(s))))
-			.toLowerCase()
-			// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
-			// strings
-			.replaceAll("[^ \\w]+", "")
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
-			.replaceAll("(\\p{Punct})+", " ")
-			.replaceAll("(\\d)+", " ")
-			.replaceAll("(\\n)+", " ")
-			.trim();
-	}
-
-	public static String nfd(final String s) {
-		return Normalizer.normalize(s, Normalizer.Form.NFD);
-	}
-
-	public static String unicodeNormalization(final String s) {
-
-		Matcher m = hexUnicodePattern.matcher(s);
-		StringBuffer buf = new StringBuffer(s.length());
-		while (m.find()) {
-			String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
-			m.appendReplacement(buf, Matcher.quoteReplacement(ch));
-		}
-		m.appendTail(buf);
-		return buf.toString();
-	}
-
-	public static Set<String> loadFromClasspath(final String classpath) {
-
-		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
-		final Set<String> h = Sets.newHashSet();
-		try {
-			for (final String s : IOUtils
-				.readLines(PaceCommonUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
-				h.add(fixAliases(transliterator.transliterate(s))); // transliteration of the stopwords
-			}
-		} catch (final Throwable e) {
-			return Sets.newHashSet();
-		}
-		return h;
-	}
-
-	protected static Iterable<String> tokens(final String s, final int maxTokens) {
-		return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
-	}
-
-}
--- a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
@ -1,8 +1,5 @@
 package eu.dnetlib.dhp.application

-import eu.dnetlib.dhp.common.Constants
-import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile
-
 import scala.io.Source

 /** This is the main Interface SparkApplication
@ -73,13 +70,4 @@ abstract class AbstractScalaApplication(
      .getOrCreate()
  }

-  def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
-    val total_items = spark.read.text(targetPath).count()
-    writeHdfsFile(
-      spark.sparkContext.hadoopConfiguration,
-      s"$total_items",
-      outputBasePath + Constants.MDSTORE_SIZE_PATH
-    )
-  }
-
 }
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -1,109 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-@Disabled
-class ZenodoAPIClientTest {
-
-	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
-	private final String ACCESS_TOKEN = "";
-
-	private final String CONCEPT_REC_ID = "657113";
-
-	private final String depositionId = "674915";
-
-	@Test
-	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewDeposition() throws IOException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(201, client.newDeposition());
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionNewName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionOldName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java
@ -1,21 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import java.util.Set;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-class BlackListProviderTest {
-
-	@Test
-	void blackListTest() {
-
-		Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
-		Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
-		Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
-		final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
-		Assertions.assertNotNull(xxx);
-		Assertions.assertEquals(0, xxx.size());
-	}
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
@ -1,87 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-import java.io.IOException;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Test;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.schema.oaf.Publication;
-
-class IdentifierFactoryTest {
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
-		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-
-	@Test
-	void testCreateIdentifierForPublication() throws IOException {
-
-		verifyIdentifier(
-			"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
-
-		verifyIdentifier(
-			"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
-
-		verifyIdentifier(
-			"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
-
-		verifyIdentifier(
-			"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
-
-		verifyIdentifier(
-			"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
-
-		verifyIdentifier(
-			"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
-
-		verifyIdentifier(
-			"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
-
-		verifyIdentifier(
-			"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
-
-		final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
-		verifyIdentifier("publication_3.json", defaultID, true);
-		verifyIdentifier("publication_4.json", defaultID, true);
-		verifyIdentifier("publication_5.json", defaultID, true);
-
-	}
-
-	@Test
-	void testCreateIdentifierForPublicationNoHash() throws IOException {
-
-		verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
-		verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
-		verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
-		verifyIdentifier(
-			"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
-
-		final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
-		verifyIdentifier("publication_3.json", defaultID, false);
-		verifyIdentifier("publication_4.json", defaultID, false);
-		verifyIdentifier("publication_5.json", defaultID, false);
-	}
-
-	@Test
-	void testCreateIdentifierForROHub() throws IOException {
-		verifyIdentifier(
-			"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
-	}
-
-	protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
-		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
-		final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
-
-		String id = IdentifierFactory.createIdentifier(pub, md5);
-		System.out.println(id);
-		assertNotNull(id);
-		assertEquals(expectedID, id);
-	}
-
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
@ -1,130 +0,0 @@
-
-package eu.dnetlib.dhp.schema.oaf.utils;
-
-import static org.junit.jupiter.api.Assertions.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.HashSet;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.apache.commons.beanutils.BeanUtils;
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Test;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.Lists;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.*;
-
-public class MergeUtilsTest {
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
-		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-
-	@Test
-	void testMergePubs_new() throws IOException {
-		Publication pt = read("publication_test.json", Publication.class);
-		Publication p1 = read("publication_test.json", Publication.class);
-
-		assertEquals(1, pt.getCollectedfrom().size());
-		assertEquals(ModelConstants.CROSSREF_ID, pt.getCollectedfrom().get(0).getKey());
-
-		Instance i = new Instance();
-		i.setUrl(Lists.newArrayList("https://..."));
-		p1.getInstance().add(i);
-
-		Publication ptp1 = MergeUtils.mergePublication(pt, p1);
-
-		assertNotNull(ptp1.getInstance());
-		assertEquals(2, ptp1.getInstance().size());
-
-	}
-
-	@Test
-	void testMergePubs() throws IOException {
-		Publication p1 = read("publication_1.json", Publication.class);
-		Publication p2 = read("publication_2.json", Publication.class);
-		Dataset d1 = read("dataset_1.json", Dataset.class);
-		Dataset d2 = read("dataset_2.json", Dataset.class);
-
-		assertEquals(1, p1.getCollectedfrom().size());
-		assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
-		assertEquals(1, d2.getCollectedfrom().size());
-		assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
-
-		assertEquals(1, p2.getCollectedfrom().size());
-		assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
-		assertEquals(1, d1.getCollectedfrom().size());
-		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
-
-		final Result p1d2 = MergeUtils.checkedMerge(p1, d2, true);
-		assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype().getClassid());
-		assertTrue(p1d2 instanceof Publication);
-		assertEquals(p1.getId(), p1d2.getId());
-	}
-
-	@Test
-	void testMergePubs_1() throws IOException {
-		Publication p2 = read("publication_2.json", Publication.class);
-		Dataset d1 = read("dataset_1.json", Dataset.class);
-
-		final Result p2d1 = MergeUtils.checkedMerge(p2, d1, true);
-		assertEquals((ModelConstants.DATASET_RESULTTYPE_CLASSID), p2d1.getResulttype().getClassid());
-		assertTrue(p2d1 instanceof Dataset);
-		assertEquals(d1.getId(), p2d1.getId());
-		assertEquals(2, p2d1.getCollectedfrom().size());
-	}
-
-	@Test
-	void testMergePubs_2() throws IOException {
-		Publication p1 = read("publication_1.json", Publication.class);
-		Publication p2 = read("publication_2.json", Publication.class);
-
-		Result p1p2 = MergeUtils.checkedMerge(p1, p2, true);
-		assertTrue(p1p2 instanceof Publication);
-		assertEquals(p1.getId(), p1p2.getId());
-		assertEquals(2, p1p2.getCollectedfrom().size());
-	}
-
-	@Test
-	void testDelegatedAuthority_1() throws IOException {
-		Dataset d1 = read("dataset_2.json", Dataset.class);
-		Dataset d2 = read("dataset_delegated.json", Dataset.class);
-
-		assertEquals(1, d2.getCollectedfrom().size());
-		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
-
-		Result res = (Result) MergeUtils.merge(d1, d2, true);
-
-		assertEquals(d2, res);
-	}
-
-	@Test
-	void testDelegatedAuthority_2() throws IOException {
-		Dataset p1 = read("publication_1.json", Dataset.class);
-		Dataset d2 = read("dataset_delegated.json", Dataset.class);
-
-		assertEquals(1, d2.getCollectedfrom().size());
-		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
-
-		Result res = (Result) MergeUtils.merge(p1, d2, true);
-
-		assertEquals(d2, res);
-	}
-
-	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
-		return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
-	}
-
-	protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
-		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
-		return OBJECT_MAPPER.readValue(json, clazz);
-	}
-
-}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -149,7 +149,7 @@ class OafMapperUtilsTest {
 	void testDate() {
 		final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
 		assertNotNull(date);
-		assertEquals("1998-02-23", date);
+		System.out.println(date);
 	}

 	@Test
@ -166,8 +166,8 @@ class OafMapperUtilsTest {

 		assertEquals(
 			ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
-			MergeUtils
-				.mergeResult(p1, d2)
+			OafMapperUtils
+				.mergeResults(p1, d2)
 				.getResulttype()
 				.getClassid());

@ -178,10 +178,10 @@ class OafMapperUtilsTest {

 		assertEquals(
 			ModelConstants.DATASET_RESULTTYPE_CLASSID,
-			((Result) MergeUtils
-				.merge(p2, d1))
-					.getResulttype()
-					.getClassid());
+			OafMapperUtils
+				.mergeResults(p2, d1)
+				.getResulttype()
+				.getClassid());
 	}

 	@Test
@ -192,7 +192,7 @@ class OafMapperUtilsTest {
 		assertEquals(1, d2.getCollectedfrom().size());
 		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));

-		Result res = MergeUtils.mergeResult(d1, d2);
+		Result res = OafMapperUtils.mergeResults(d1, d2);

 		assertEquals(d2, res);

--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricherTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricherTest.java
@ -1,9 +1,10 @@

-package eu.dnetlib.dhp.enrich.orcid;
+package eu.dnetlib.oa.merge;
+
+import static org.junit.jupiter.api.Assertions.*;

 import java.io.BufferedReader;
 import java.io.InputStreamReader;
-import java.util.Collections;
 import java.util.List;
 import java.util.Objects;

@ -13,9 +14,10 @@ import org.junit.platform.commons.util.StringUtils;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;

+import eu.dnetlib.dhp.oa.merge.AuthorMerger;
 import eu.dnetlib.dhp.schema.oaf.Author;

-public class ORCIDAuthorEnricherTest {
+public class AuthorMergerTest {

 	@Test
 	public void testEnrcichAuthor() throws Exception {
@ -24,13 +26,12 @@ public class ORCIDAuthorEnricherTest {
 		BufferedReader pr = new BufferedReader(new InputStreamReader(
 			Objects
 				.requireNonNull(
-					ORCIDAuthorEnricherTest.class
-						.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_publication_sample.json"))));
+					AuthorMergerTest.class
+						.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
 		BufferedReader or = new BufferedReader(new InputStreamReader(
 			Objects
 				.requireNonNull(
-					ORCIDAuthorEnricherTest.class
-						.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_orcid_sample.json"))));
+					AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));

 		TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
 		};
@ -66,8 +67,7 @@ public class ORCIDAuthorEnricherTest {
 				long start = System.currentTimeMillis();

 //                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
-				final List<Author> enrichedList = Collections.emptyList(); // SparkEnrichGraphWithOrcidAuthors.enrichOrcid(publicationAuthors,
-																			// orcidAuthors);
+				final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);

 				long enrichedAuthorWithPid = enrichedList
 					.stream()
@ -91,4 +91,24 @@ public class ORCIDAuthorEnricherTest {
 		}
 	}

+	@Test
+	public void checkSimilarityTest() {
+		final Author left = new Author();
+		left.setName("Anand");
+		left.setSurname("Rachna");
+		left.setFullname("Anand, Rachna");
+
+		System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
+
+		final Author right = new Author();
+		right.setName("Rachna");
+		right.setSurname("Anand");
+		right.setFullname("Rachna, Anand");
+//        System.out.println(AuthorMerger.normalize(right.getFullname()));
+		boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
+
+		assertTrue(same);
+
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/enrich/orcid/authors_orcid_sample.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/enrich/orcid/authors_orcid_sample.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/enrich/orcid/authors_publication_sample.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/enrich/orcid/authors_publication_sample.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json
@ -1,12 +0,0 @@
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
-{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
@ -1 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json
@ -1,33 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
-        "value": "Crossref"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        }
-      ]
-    },
-    {
-      "pid": [
-        {
-          "qualifier": {"classid": "urn"},
-          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-        },
-        {
-          "qualifier": {"classid": "scp-number"},
-          "value": "79953761260"
-        },
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json
@ -1,37 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
-        "value": "Crossref"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        }
-      ]
-    },
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
-        "value": "Europe PubMed Central"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "urn"},
-          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-        },
-        {
-          "qualifier": {"classid": "scp-number"},
-          "value": "79953761260"
-        },
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json
@ -1,37 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
-        "value": "Zenodo"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        }
-      ]
-    },
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
-        "value": "Europe PubMed Central"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "urn"},
-          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-        },
-        {
-          "qualifier": {"classid": "scp-number"},
-          "value": "79953761260"
-        },
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json
@ -1,37 +0,0 @@
-{
-  "id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
-        "value": "Zenodo"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        },
-        {
-          "qualifier": {"classid": "handle"},
-          "value": "11012/83840"
-        }
-      ]
-    },
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::2852",
-        "value": "Digital library of Brno University of Technology"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        },
-        {
-          "qualifier": {"classid": "handle"},
-          "value": "11012/83840"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json
@ -1,37 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
-        "value": "Zenodo"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.5281/zenodo.5121485"
-        }
-      ]
-    },
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
-        "value": "Europe PubMed Central"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "urn"},
-          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-        },
-        {
-          "qualifier": {"classid": "scp-number"},
-          "value": "79953761260"
-        },
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_1.json
@ -1,3 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor":  "gold", "isInDiamondJournal": null, "publiclyFunded": null}
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor":  "gold", "isInDiamondJournal": true, "publiclyFunded": false }
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor":  null, "isInDiamondJournal": true, "publiclyFunded": false }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_2.json
@ -1,3 +0,0 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor":  "gold", "isInDiamondJournal": null, "publiclyFunded": null}
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor":  "bronze", "isInDiamondJournal": true, "publiclyFunded": false }
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor":  null, "isInDiamondJournal": true, "publiclyFunded": false }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_openapc.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_openapc.json
@ -1,31 +0,0 @@
-{
-  "id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
-  "resuttype": {
-    "classid": "publication"
-  },
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
-        "value": "OpenAPC Global Initiative"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        },
-        {
-          "qualifier": {"classid": "pmc"},
-          "value": "21459329"
-        },
-        {
-          "qualifier": {"classid": "pmid"},
-          "value": "25811027"
-        }
-      ],
-      "url":["https://doi.org/10.1155/2015/439379"]
-    }
-  ]
-}
-
-
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json
@ -1,17 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "pid": [
-    {
-      "qualifier": {"classid": "urn"},
-      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-    },
-    {
-      "qualifier": {"classid": "scp-number"},
-      "value": "79953761260"
-    },
-    {
-      "qualifier": {"classid": "pmc"},
-      "value": "21459329"
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json
@ -1,21 +0,0 @@
-{
-  "id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "instance": [
-    {
-      "collectedfrom": {
-        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
-        "value": "Europe PubMed Central"
-      },
-      "pid": [
-        {
-          "qualifier": {"classid": "doi"},
-          "value": "10.1016/j.cmet.2010.03.013"
-        },
-        {
-          "qualifier":{"classid":"pmc"},
-          "value":"21459329"
-        }
-      ]
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_test.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_test.json
@ -1,428 +0,0 @@
-{
-  "author": [
-    {
-      "affiliation": null,
-      "fullname": "Deymier, Ghislaine",
-      "name": "Ghislaine",
-      "pid": [],
-      "rank": 1,
-      "surname": "Deymier"
-    },
-    {
-      "affiliation": null,
-      "fullname": "Gaschet, Frédéric",
-      "name": "Frédéric",
-      "pid": [],
-      "rank": 2,
-      "surname": "Gaschet"
-    },
-    {
-      "affiliation": null,
-      "fullname": "Pouyanne, Guillaume",
-      "name": "Guillaume",
-      "pid": [],
-      "rank": 3,
-      "surname": "Pouyanne"
-    }
-  ],
-  "bestaccessright": {
-    "classid": "OPEN",
-    "classname": "Open Access",
-    "schemeid": "dnet:access_modes",
-    "schemename": "dnet:access_modes"
-  },
-  "collectedfrom": [
-    {
-      "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
-      "value" : "Crossref"
-    }
-  ],
-  "context": [],
-  "contributor": [],
-  "country": [],
-  "coverage": [],
-  "dataInfo": {
-    "deletedbyinference": false,
-    "inferred": false,
-    "invisible": false,
-    "provenanceaction": {
-      "classid": "sysimport:crosswalk:repository",
-      "classname": "Harvested",
-      "schemeid": "dnet:provenanceActions",
-      "schemename": "dnet:provenanceActions"
-    },
-    "trust": "0.9"
-  },
-  "dateofacceptance": {
-    "dataInfo": {
-      "deletedbyinference": false,
-      "inferred": false,
-      "invisible": false,
-      "provenanceaction": {
-        "classid": "sysimport:crosswalk:repository",
-        "classname": "Harvested",
-        "schemeid": "dnet:provenanceActions",
-        "schemename": "dnet:provenanceActions"
-      },
-      "trust": "0.9"
-    },
-    "value": "2013-11-30"
-  },
-  "dateofcollection": "2024-02-28T00:22:13+0000",
-  "dateoftransformation": "2024-03-06T08:43:13.253Z",
-  "description": [
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "value": "For analyzing the reciprocal interaction between urban sprawl and car use, research has first focused on the link between urban density and mobility. By looking for a reduction in energy consumption, cities have favoured a compact planning development. Then reflection has broadened from the simple density to the wider, multi-dimensional concept of urban form. This controversy has led to a renewal of analysis in term of the costs of urban growth, notably by comparing the costs of \"compact\" and \"sprawled\" development. The idea is to compare the mobility costs of different urban forms. However, most often because of a lack of data, such studies are scarce. This paper suggests an innovative method to compute mobility costs at an infra-urban scale : The Spatialized Travel Account (STA). It is based on the CERTU's travel account methodology at a metropolitan scale. It puts forward an accurate estimate of the mobility costs for each transport mode (individual and public) and for each type of payer (households, firms, local authorities...). In order to test the relationships between mobility costs and urban form, we link the computed costs to morphological characteristics of infra-urban zones, taking in account sociodemographic characteristics of households."
-    },
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "value": "L'interaction réciproque entre étalement urbain et usage de l'automobile a conduit la recherche à se focaliser sur le lien entre les densités urbaines et la mobilité. En cherchant à réduire leur consommation d'énergie pour les transports, et donc leurs émissions de Gaz à Effet de Serre, les villes ont alors cherché à planifier la \" ville compacte \", privilégiant notamment la reconstruction de la ville sur elle-même et la densification. Par la suite, la réflexion s'est élargie de la simple densité à la notion de forme urbaine et à toutes ses dimensions. Cette controverse devait conduire à un renouveau des analyses en termes de coûts de la croissance urbaine : le débat reste vif, encore aujourd'hui, sur les coûts comparés de la ville étalée et de la ville compacte. Plus largement, il s'agit d'explorer les coûts des différentes formes urbaines en termes de mobilité. Malgré cela, généralement pour des raisons de disponibilité de données, les études sur le sujet restent extrêmement rares. Cet article propose un outil novateur pour mesurer les coûts de la mobilité à l'échelle intraurbaine : le Compte Déplacements Territorialisé (CDT). Il s'inspire de la méthode développée par le CERTU pour l'établissement des Comptes Déplacements Voyageurs à l'échelle métropolitaine. Le CDT propose, pour chacune des zones de l'agglomération, une estimation précise de l'ensemble des coûts liés aux déplacements de personnes, ventilés par mode de transport (individuels et collectifs) et par type de financeurs (ménages, entreprises, collectivités territoriales, etc.). Nous proposons une application de cette méthode à la controverse sur le lien entre forme urbaine et coûts de la mobilité. Les coûts sont reliés aux caractéristiques morphologiques des zones (en termes de densité et de diversité, notamment), en prenant soin de contrôler les facteurs socio-économiques qui influent traditionnellement sur les comportements de mobilité (taille du ménage, revenu, etc.)."
-    }
-  ],
-  "eoscifguidelines": [],
-  "externalReference": [],
-  "extraInfo": [],
-  "format": [
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "value": "application/pdf"
-    }
-  ],
-  "fulltext": [],
-  "id": "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca",
-  "instance": [
-    {
-      "accessright": {
-        "classid": "OPEN",
-        "classname": "Open Access",
-        "schemeid": "dnet:access_modes",
-        "schemename": "dnet:access_modes"
-      },
-      "alternateIdentifier": [
-        {
-          "dataInfo": {
-            "deletedbyinference": false,
-            "inferred": false,
-            "invisible": false,
-            "provenanceaction": {
-              "classid": "sysimport:crosswalk:repository",
-              "classname": "Harvested",
-              "schemeid": "dnet:provenanceActions",
-              "schemename": "dnet:provenanceActions"
-            },
-            "trust": "0.9"
-          },
-          "qualifier": {
-            "classid": "doi",
-            "classname": "Digital Object Identifier",
-            "schemeid": "dnet:pid_types",
-            "schemename": "dnet:pid_types"
-          },
-          "value": "10.46298/cst.12132"
-        }
-      ],
-      "collectedfrom": {
-        "key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
-        "value": "Episciences"
-      },
-      "dateofacceptance": {
-        "dataInfo": {
-          "deletedbyinference": false,
-          "inferred": false,
-          "invisible": false,
-          "provenanceaction": {
-            "classid": "sysimport:crosswalk:repository",
-            "classname": "Harvested",
-            "schemeid": "dnet:provenanceActions",
-            "schemename": "dnet:provenanceActions"
-          },
-          "trust": "0.9"
-        },
-        "value": "2013-11-30"
-      },
-      "hostedby": {
-        "key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
-        "value": "Episciences"
-      },
-      "instanceTypeMapping": [
-        {
-          "originalType": "http://purl.org/coar/resource_type/c_6501",
-          "typeCode": "http://purl.org/coar/resource_type/c_6501",
-          "typeLabel": "journal article",
-          "vocabularyName": "openaire::coar_resource_types_3_1"
-        },
-        {
-          "originalType": "http://purl.org/coar/resource_type/c_6501",
-          "typeCode": "Article",
-          "typeLabel": "Article",
-          "vocabularyName": "openaire::user_resource_types"
-        }
-      ],
-      "instancetype": {
-        "classid": "0001",
-        "classname": "Article",
-        "schemeid": "dnet:publication_resource",
-        "schemename": "dnet:publication_resource"
-      },
-      "license": {
-        "dataInfo": {
-          "deletedbyinference": false,
-          "inferred": false,
-          "invisible": false,
-          "provenanceaction": {
-            "classid": "sysimport:crosswalk:repository",
-            "classname": "Harvested",
-            "schemeid": "dnet:provenanceActions",
-            "schemename": "dnet:provenanceActions"
-          },
-          "trust": "0.9"
-        },
-        "value": "CC BY NC SA"
-      },
-      "pid": [],
-      "refereed": {
-        "classid": "0002",
-        "classname": "nonPeerReviewed",
-        "schemeid": "dnet:review_levels",
-        "schemename": "dnet:review_levels"
-      },
-      "url": [
-        "https://doi.org/10.46298/cst.12132",
-        "https://cst.episciences.org/12132"
-      ]
-    }
-  ],
-  "language": {
-    "classid": "fra/fre",
-    "classname": "French",
-    "schemeid": "dnet:languages",
-    "schemename": "dnet:languages"
-  },
-  "lastupdatetimestamp": 1710636106633,
-  "metaResourceType": {
-    "classid": "Research Literature",
-    "classname": "Research Literature",
-    "schemeid": "openaire::meta_resource_types",
-    "schemename": "openaire::meta_resource_types"
-  },
-  "originalId": [
-    "oai:episciences.org:cst:12132",
-    "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca"
-  ],
-  "pid": [],
-  "publisher": {
-    "dataInfo": {
-      "deletedbyinference": false,
-      "inferred": false,
-      "invisible": false,
-      "provenanceaction": {
-        "classid": "sysimport:crosswalk:repository",
-        "classname": "Harvested",
-        "schemeid": "dnet:provenanceActions",
-        "schemename": "dnet:provenanceActions"
-      },
-      "trust": "0.9"
-    },
-    "value": "episciences.org"
-  },
-  "relevantdate": [
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "Accepted",
-        "classname": "Accepted",
-        "schemeid": "dnet:dataCite_date",
-        "schemename": "dnet:dataCite_date"
-      },
-      "value": "2024-02-11"
-    },
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "issued",
-        "classname": "issued",
-        "schemeid": "dnet:dataCite_date",
-        "schemename": "dnet:dataCite_date"
-      },
-      "value": "2013-11-30"
-    },
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "available",
-        "classname": "available",
-        "schemeid": "dnet:dataCite_date",
-        "schemename": "dnet:dataCite_date"
-      },
-      "value": "2013-11-30"
-    }
-  ],
-  "resourcetype": {
-    "classid": "journal article",
-    "classname": "journal article",
-    "schemeid": "dnet:dataCite_resource",
-    "schemename": "dnet:dataCite_resource"
-  },
-  "resulttype": {
-    "classid": "publication",
-    "classname": "publication",
-    "schemeid": "dnet:result_typologies",
-    "schemename": "dnet:result_typologies"
-  },
-  "source": [],
-  "subject": [
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "keyword",
-        "classname": "keyword",
-        "schemeid": "dnet:subject_classification_typologies",
-        "schemename": "dnet:subject_classification_typologies"
-      },
-      "value": "JEL: H - Public Economics/H.H7 - State and Local Government • Intergovernmental Relations/H.H7.H72 - State and Local Budget and Expenditures"
-    },
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "keyword",
-        "classname": "keyword",
-        "schemeid": "dnet:subject_classification_typologies",
-        "schemename": "dnet:subject_classification_typologies"
-      },
-      "value": "Local public finance"
-    },
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "keyword",
-        "classname": "keyword",
-        "schemeid": "dnet:subject_classification_typologies",
-        "schemename": "dnet:subject_classification_typologies"
-      },
-      "value": "JEL: R - Urban, Rural, Regional, Real Estate, and Transportation Economics/R.R5 - Regional Government Analysis/R.R5.R51 - Finance in Urban and Rural Economies"
-    }
-  ],
-  "title": [
-    {
-      "dataInfo": {
-        "deletedbyinference": false,
-        "inferred": false,
-        "invisible": false,
-        "provenanceaction": {
-          "classid": "sysimport:crosswalk:repository",
-          "classname": "Harvested",
-          "schemeid": "dnet:provenanceActions",
-          "schemename": "dnet:provenanceActions"
-        },
-        "trust": "0.9"
-      },
-      "qualifier": {
-        "classid": "main title",
-        "classname": "main title",
-        "schemeid": "dnet:dataCite_title",
-        "schemename": "dnet:dataCite_title"
-      },
-      "value": "Urban form and the costs of daily mobility. The spatialized travel account tool and its application to the Bordeaux metropolitan area"
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json
@ -1,23 +0,0 @@
-{
-  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
-  "pid": [
-    {
-      "qualifier": {
-        "classid": "urn"
-      },
-      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
-    },
-    {
-      "qualifier": {
-        "classid": "scp-number"
-      },
-      "value": "79953761260"
-    },
-    {
-      "qualifier": {
-        "classid": "pmcid"
-      },
-      "value": "21459329"
-    }
-  ]
-}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publications.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publications.json
--- a/dhp-pace-core/pom.xml
+++ b/dhp-pace-core/pom.xml
@ -49,12 +49,6 @@
 	</build>

 	<dependencies>
-		<dependency>
-			<groupId>eu.dnetlib.dhp</groupId>
-			<artifactId>dhp-common</artifactId>
-			<version>${project.version}</version>
-		</dependency>
-
 		<dependency>
 			<groupId>edu.cmu</groupId>
 			<artifactId>secondstring</artifactId>
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java
@ -20,7 +20,7 @@ public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction {
 		return suffixPrefixChain(s, param("mod"));
 	}

-	static Collection<String> suffixPrefixChain(String s, int mod) {
+	private Collection<String> suffixPrefixChain(String s, int mod) {

 		// create the list of words from the string (remove short words)
 		List<String> wordsList = Arrays
@ -38,7 +38,7 @@ public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction {

 	}

-	static private Collection<String> doSuffixPrefixChain(List<String> wordsList, String prefix) {
+	private Collection<String> doSuffixPrefixChain(List<String> wordsList, String prefix) {

 		Set<String> set = Sets.newLinkedHashSet();
 		switch (wordsList.size()) {
@ -80,16 +80,12 @@ public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction {

 	}

-	private static String suffix(String s, int len) {
+	private String suffix(String s, int len) {
 		return s.substring(s.length() - len);
 	}

-	private static String prefix(String s, int len) {
+	private String prefix(String s, int len) {
 		return s.substring(0, len);
 	}

-	static public void main(String[] args) {
-		String title = "MY LIFE AS A BOSON: THE STORY OF \"THE HIGGS\"".toLowerCase();
-		System.out.println(suffixPrefixChain(title, 10));
-	}
 }
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@ -1,26 +1,32 @@

 package eu.dnetlib.pace.common;

-import com.google.common.base.Joiner;
-import com.google.common.collect.Sets;
-import com.ibm.icu.text.Transliterator;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-
 import java.io.IOException;
 import java.io.StringWriter;
 import java.nio.charset.StandardCharsets;
+import java.text.Normalizer;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;

+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
+import com.ibm.icu.text.Transliterator;
+
+import eu.dnetlib.pace.clustering.NGramUtils;
+
 /**
 * Set of common functions for the framework
 *
 * @author claudio
 */
-public class AbstractPaceFunctions extends PaceCommonUtils {
+public class AbstractPaceFunctions {

 	// city map to be used when translating the city names into codes
 	private static Map<String, String> cityMap = AbstractPaceFunctions
@ -35,6 +41,9 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 	protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
 	protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");

+	// transliterator
+	protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
 	// blacklist of ngrams: to avoid generic keys
 	protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");

@ -42,6 +51,8 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 	public static final Pattern HTML_REGEX = Pattern.compile("<[^>]*>");

 	private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
+	private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
+	private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";

 	// doi prefix for normalization
 	public static final Pattern DOI_PREFIX = Pattern.compile("(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)");
@ -118,6 +129,25 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return numberPattern.matcher(strNum).matches();
 	}

+	protected static String fixAliases(final String s) {
+		final StringBuilder sb = new StringBuilder();
+
+		s.chars().forEach(ch -> {
+			final int i = StringUtils.indexOf(aliases_from, ch);
+			sb.append(i >= 0 ? aliases_to.charAt(i) : (char) ch);
+		});
+
+		return sb.toString();
+	}
+
+	protected static String transliterate(final String s) {
+		try {
+			return transliterator.transliterate(s);
+		} catch (Exception e) {
+			return s;
+		}
+	}
+
 	protected static String removeSymbols(final String s) {
 		final StringBuilder sb = new StringBuilder();

@ -132,6 +162,23 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return s != null;
 	}

+	public static String normalize(final String s) {
+		return fixAliases(transliterate(nfd(unicodeNormalization(s))))
+			.toLowerCase()
+			// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
+			// strings
+			.replaceAll("[^ \\w]+", "")
+			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
+			.replaceAll("(\\p{Punct})+", " ")
+			.replaceAll("(\\d)+", " ")
+			.replaceAll("(\\n)+", " ")
+			.trim();
+	}
+
+	public static String nfd(final String s) {
+		return Normalizer.normalize(s, Normalizer.Form.NFD);
+	}
+
 	public static String utf8(final String s) {
 		byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
 		return new String(bytes, StandardCharsets.UTF_8);
@ -186,6 +233,22 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return newset;
 	}

+	public static Set<String> loadFromClasspath(final String classpath) {
+
+		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
+		final Set<String> h = Sets.newHashSet();
+		try {
+			for (final String s : IOUtils
+				.readLines(NGramUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
+				h.add(fixAliases(transliterator.transliterate(s))); // transliteration of the stopwords
+			}
+		} catch (final Throwable e) {
+			return Sets.newHashSet();
+		}
+		return h;
+	}
+
 	public static Map<String, String> loadMapFromClasspath(final String classpath) {

 		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
@ -240,6 +303,10 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return StringUtils.substring(s, 0, 1).toLowerCase();
 	}

+	protected static Iterable<String> tokens(final String s, final int maxTokens) {
+		return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
+	}
+
 	public static String normalizePid(String pid) {
 		return DOI_PREFIX.matcher(pid.toLowerCase()).replaceAll("");
 	}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
@ -12,7 +12,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.hash.Hashing;

-import eu.dnetlib.pace.common.PaceCommonUtils;
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
 import eu.dnetlib.pace.util.Capitalise;
 import eu.dnetlib.pace.util.DotAbbreviations;

@ -86,7 +86,7 @@ public class Person {

 	private List<String> splitTerms(final String s) {
 		if (particles == null) {
-			particles = PaceCommonUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
+			particles = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
 		}

 		final List<String> list = Lists.newArrayList();
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java
@ -1,10 +1,8 @@

 package eu.dnetlib.pace.tree;

-import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.function.BiFunction;
 import java.util.stream.Collectors;

 import com.wcohen.ss.AbstractStringDistance;
@ -13,7 +11,6 @@ import eu.dnetlib.pace.config.Config;
 import eu.dnetlib.pace.model.Person;
 import eu.dnetlib.pace.tree.support.AbstractListComparator;
 import eu.dnetlib.pace.tree.support.ComparatorClass;
-import eu.dnetlib.pace.util.AuthorMatchers;

@ComparatorClass("authorsMatch")
 public class AuthorsMatch extends AbstractListComparator {
@ -44,36 +41,24 @@ public class AuthorsMatch extends AbstractListComparator {
 	}

 	@Override
-	public double compare(final List<String> left, final List<String> right, final Config conf) {
-		if (left.isEmpty() || right.isEmpty())
+	public double compare(final List<String> a, final List<String> b, final Config conf) {
+		if (a.isEmpty() || b.isEmpty())
 			return -1;

-		if (left.size() > SIZE_THRESHOLD || right.size() > SIZE_THRESHOLD)
+		if (a.size() > SIZE_THRESHOLD || b.size() > SIZE_THRESHOLD)
 			return 1.0;

-		Double threshold = getDoubleParam("threshold");
 		int maxMiss = Integer.MAX_VALUE;
+		List<Person> bList = b.stream().map(author -> new Person(author, false)).collect(Collectors.toList());

-		if (threshold != null && threshold >= 0.0 && threshold <= 1.0 && left.size() == right.size()) {
-			maxMiss = (int) Math.floor((1 - threshold) * Math.max(left.size(), right.size()));
+		Double threshold = getDoubleParam("threshold");
+
+		if (threshold != null && threshold >= 0.0 && threshold <= 1.0 && a.size() == b.size()) {
+			maxMiss = (int) Math.floor((1 - threshold) * Math.max(a.size(), b.size()));
 		}

 		int common = 0;
-
-		List<String> a = new ArrayList<>(left);
-		List<String> b = new ArrayList<>(right);
-
-		common += AuthorMatchers
-			.removeMatches(a, b, (BiFunction<String, String, Object>) AuthorMatchers::matchEqualsIgnoreCase)
-			.size() / 2;
-		common += AuthorMatchers
-			.removeMatches(a, b, (BiFunction<String, String, Object>) AuthorMatchers::matchOrderedTokenAndAbbreviations)
-			.size() / 2;
-
-		List<Person> bList = b.stream().map(author -> new Person(author, false)).collect(Collectors.toList());
-
 		// compare each element of List1 with each element of List2
-		int alreadyMatched = common;
 		for (int i = 0; i < a.size(); i++) {
 			Person p1 = new Person(a.get(i), false);

@ -138,13 +123,13 @@ public class AuthorsMatch extends AbstractListComparator {
 				}
 			}

-			if (i - common - alreadyMatched > maxMiss) {
+			if (i - common > maxMiss) {
 				return 0.0;
 			}
 		}

 		// normalization factor to compute the score
-		int normFactor = left.size() == right.size() ? left.size() : (left.size() + right.size() - common);
+		int normFactor = a.size() == b.size() ? a.size() : (a.size() + b.size() - common);

 		if (TYPE.equals("percentage")) {
 			return (double) common / normFactor;
@ -175,4 +160,5 @@ public class AuthorsMatch extends AbstractListComparator {
 	public String normalization(String s) {
 		return normalize(utf8(cleanup(s)));
 	}
+
 }
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java
@ -23,18 +23,15 @@ public class InstanceTypeMatch extends AbstractListComparator {

 		// jolly types
 		translationMap.put("Conference object", "*");
-		translationMap.put("Research", "*");
 		translationMap.put("Other literature type", "*");
 		translationMap.put("Unknown", "*");
 		translationMap.put("UNKNOWN", "*");

 		// article types
 		translationMap.put("Article", "Article");
-		translationMap.put("Journal", "Article");
 		translationMap.put("Data Paper", "Article");
 		translationMap.put("Software Paper", "Article");
 		translationMap.put("Preprint", "Article");
-		translationMap.put("Part of book or chapter of book", "Article");

 		// thesis types
 		translationMap.put("Thesis", "Thesis");
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/AuthorMatchers.scala
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/AuthorMatchers.scala
@ -1,112 +0,0 @@
-package eu.dnetlib.pace.util
-
-import java.util.Locale
-import java.util.regex.Pattern
-import scala.util.control.Breaks.{break, breakable}
-
-object AuthorMatchers {
-  val SPLIT_REGEX = Pattern.compile("[\\s,\\.]+")
-
-  val WORD_DIFF = 2
-
-  def matchEqualsIgnoreCase(a1: String, a2: String): Boolean = {
-    if (a1 == null || a2 == null)
-      false
-    else
-      a1 == a2 || a1.toLowerCase(Locale.ROOT).equals(a2.toLowerCase(Locale.ROOT))
-  }
-
-  def matchOtherNames(fullName: String, otherNames: Seq[String]): Boolean = {
-    if (otherNames != null) {
-      otherNames.exists(matchEqualsIgnoreCase(fullName, _))
-    } else {
-      false
-    }
-  }
-
-  def matchOrderedTokenAndAbbreviations(a1: String, a2: String): Boolean = {
-    val p1: Array[String] = SPLIT_REGEX.split(a1.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted
-    val p2: Array[String] = SPLIT_REGEX.split(a2.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted
-
-    if (p1.length < 2 || p2.length < 2) return false
-    if (Math.abs(p1.length - p2.length) > WORD_DIFF) return false // use alternative comparison algo
-
-    var p1Idx: Int = 0
-    var p2Idx: Int = 0
-    var shortMatches: Int = 0
-    var longMatches: Int = 0
-    while (p1Idx < p1.length && p2Idx < p2.length) {
-      val e1: String = p1(p1Idx)
-      val c1: Char = e1.charAt(0)
-      val e2: String = p2(p2Idx)
-      val c2: Char = e2.charAt(0)
-      if (c1 < c2) p1Idx += 1
-      else if (c1 > c2) p2Idx += 1
-      else {
-        var res: Boolean = false
-        if (e1.length != 1 && e2.length != 1) {
-          res = e1 == e2
-          if (res)
-            longMatches += 1
-        } else {
-          res = true
-          shortMatches += 1
-        }
-        if (res) {
-          p1Idx += 1
-          p2Idx += 1
-        } else {
-          val diff: Int = e1.compareTo(e2)
-          if (diff < 0) p1Idx += 1
-          else if (diff > 0) p2Idx += 1
-        }
-      }
-    }
-    longMatches > 0 && (shortMatches + longMatches) == Math.min(p1.length, p2.length)
-  }
-
-  def removeMatches(
-                     graph_authors: java.util.List[String],
-                     orcid_authors: java.util.List[String],
-                     matchingFunc: java.util.function.BiFunction[String,String,Boolean]
-                   ) : java.util.List[String] = {
-    removeMatches(graph_authors, orcid_authors, (a, b) => matchingFunc(a,b))
-  }
-
-
-  def removeMatches(
-                                       graph_authors: java.util.List[String],
-                                       orcid_authors: java.util.List[String],
-                                       matchingFunc: (String, String) => Boolean
-                                     ) : java.util.List[String]  = {
-    val matched = new java.util.ArrayList[String]()
-
-    if (graph_authors != null && !graph_authors.isEmpty) {
-      val ait = graph_authors.iterator
-
-      while (ait.hasNext) {
-        val author = ait.next()
-        val oit = orcid_authors.iterator
-
-        breakable {
-          while (oit.hasNext) {
-            val orcid = oit.next()
-
-            if (matchingFunc(author, orcid)) {
-              ait.remove()
-              oit.remove()
-
-              matched.add(author)
-              matched.add(orcid)
-
-              break()
-            }
-          }
-        }
-      }
-    }
-
-    matched
-  }
-
-}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
@ -15,4 +15,4 @@ public class Capitalise implements Function<String, String> {
 	public String apply(final String s) {
 		return WordUtils.capitalize(s.toLowerCase(), DELIM);
 	}
-}
+};
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DotAbbreviations.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DotAbbreviations.java
@ -8,4 +8,4 @@ public class DotAbbreviations implements Function<String, String> {
 	public String apply(String s) {
 		return s.length() == 1 ? s + "." : s;
 	}
-}
+};
--- a/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/name_particles.txt
+++ b/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/name_particles.txt
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
@ -7,10 +7,10 @@ import java.util.HashMap;
 import java.util.Map;

 import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

 import eu.dnetlib.pace.model.Person;
+import jdk.nashorn.internal.ir.annotations.Ignore;

 public class UtilTest {

@ -22,7 +22,7 @@ public class UtilTest {
 	}

 	@Test
-	@Disabled
+	@Ignore
 	public void paceResolverTest() {
 		PaceResolver paceResolver = new PaceResolver();
 		paceResolver.getComparator("keywordMatch", params);
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java
@ -7,7 +7,8 @@ import java.util.function.BiFunction;

 import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Relation;

 /** OAF model merging support. */
 public class MergeAndGet {
@ -45,7 +46,20 @@ public class MergeAndGet {
 	}

 	private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) {
-		return (G) MergeUtils.merge(x, y);
+		if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
+			((Relation) x).mergeFrom((Relation) y);
+			return x;
+		} else if (isSubClass(x, OafEntity.class)
+			&& isSubClass(y, OafEntity.class)
+			&& isSubClass(x, y)) {
+			((OafEntity) x).mergeFrom((OafEntity) y);
+			return x;
+		}
+		throw new RuntimeException(
+			String
+				.format(
+					"MERGE_FROM_AND_GET incompatible types: %s, %s",
+					x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
 	}

 	@SuppressWarnings("unchecked")
--- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java
+++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java
@ -8,7 +8,6 @@ import static org.mockito.Mockito.*;

 import java.util.function.BiFunction;

-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;

@ -86,7 +85,6 @@ public class MergeAndGetTest {
 		}

 		@Test
-		@Disabled
 		void shouldBehaveProperlyForRelationAndRelation() {
 			// given
 			Relation a = mock(Relation.class);
@ -98,9 +96,7 @@ public class MergeAndGetTest {
 			// then
 			Oaf x = fn.get().apply(a, b);
 			assertTrue(Relation.class.isAssignableFrom(x.getClass()));
-
-			// TODO should be reimplemented
-			// verify(a).mergeFrom(b);
+			verify(a).mergeFrom(b);
 			assertEquals(a, x);
 		}

@ -149,7 +145,6 @@ public class MergeAndGetTest {
 		}

 		@Test
-		@Disabled
 		void shouldBehaveProperlyForOafEntityAndOafEntity() {
 			// given
 			OafEntity a = mock(OafEntity.class);
@ -161,9 +156,7 @@ public class MergeAndGetTest {
 			// then
 			Oaf x = fn.get().apply(a, b);
 			assertTrue(OafEntity.class.isAssignableFrom(x.getClass()));
-
-			// TODO should be reimplemented
-			// verify(a).mergeFrom(b);
+			verify(a).mergeFrom(b);
 			assertEquals(a, x);
 		}
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -64,12 +64,6 @@ public class PrepareAffiliationRelations implements Serializable {
 		final String pubmedInputPath = parser.get("pubmedInputPath");
 		log.info("pubmedInputPath: {}", pubmedInputPath);

-		final String openapcInputPath = parser.get("openapcInputPath");
-		log.info("openapcInputPath: {}", openapcInputPath);
-
-		final String dataciteInputPath = parser.get("dataciteInputPath");
-		log.info("dataciteInputPath: {}", dataciteInputPath);
-
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

@ -91,20 +85,8 @@ public class PrepareAffiliationRelations implements Serializable {
 				JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
 					spark, pubmedInputPath, collectedFromPubmed);

-				List<KeyValue> collectedFromOpenAPC = OafMapperUtils
-					.listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC");
-				JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations(
-					spark, openapcInputPath, collectedFromOpenAPC);
-
-				List<KeyValue> collectedFromDatacite = OafMapperUtils
-					.listKeyValues(ModelConstants.DATACITE_ID, "Datacite");
-				JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
-					spark, dataciteInputPath, collectedFromDatacite);
-
 				crossrefRelations
 					.union(pubmedRelations)
-					.union(openAPCRelations)
-					.union(dataciteRelations)
 					.saveAsHadoopFile(
 						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
@ -34,10 +34,6 @@ public class BipProjectModel {

 	String totalCitationCount;

-	public String getProjectId() {
-		return projectId;
-	}
-
 	// each project bip measure has exactly one value, hence one key-value pair
 	private Measure createMeasure(String measureId, String measureValue) {

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
@ -75,7 +75,6 @@ public class GetFOSSparkJob implements Serializable {
 		fosData.map((MapFunction<Row, FOSDataModel>) r -> {
 			FOSDataModel fosDataModel = new FOSDataModel();
 			fosDataModel.setDoi(r.getString(0).toLowerCase());
-			fosDataModel.setOaid(r.getString(1).toLowerCase());
 			fosDataModel.setLevel1(r.getString(2));
 			fosDataModel.setLevel2(r.getString(3));
 			fosDataModel.setLevel3(r.getString(4));
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
@ -16,14 +16,12 @@ import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.schema.oaf.Subject;
@ -54,92 +52,62 @@ public class PrepareFOSSparkJob implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

-		final Boolean distributeDOI = Optional
-			.ofNullable(parser.get("distributeDoi"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (distributeDOI)
-					distributeFOSdois(
-						spark,
-						sourcePath,
+				distributeFOSdois(
+					spark,
+					sourcePath,

-						outputPath);
-				else
-					distributeFOSoaid(spark, sourcePath, outputPath);
+					outputPath);
 			});
 	}

-	private static void distributeFOSoaid(SparkSession spark, String sourcePath, String outputPath) {
-		Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
-
-		fosDataset
-			.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getOaid().toLowerCase(), Encoders.STRING())
-			.mapGroups(
-				(MapGroupsFunction<String, FOSDataModel, Result>) (k,
-					it) -> getResult(
-						ModelSupport.entityIdPrefix.get(Result.class.getSimpleName().toLowerCase()) + "|" + k, it),
-				Encoders.bean(Result.class))
-			.write()
-			.mode(SaveMode.Overwrite)
-			.option("compression", "gzip")
-			.json(outputPath + "/fos");
-	}
-
-	@NotNull
-	private static Result getResult(String k, Iterator<FOSDataModel> it) {
-		Result r = new Result();
-		FOSDataModel first = it.next();
-		r.setId(k);
-
-		HashSet<String> level1 = new HashSet<>();
-		HashSet<String> level2 = new HashSet<>();
-		HashSet<String> level3 = new HashSet<>();
-		HashSet<String> level4 = new HashSet<>();
-		addLevels(level1, level2, level3, level4, first);
-		it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v));
-		List<Subject> sbjs = new ArrayList<>();
-		level1
-			.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-		level2
-			.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-		level3
-			.forEach(
-				l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
-		level4
-			.forEach(
-				l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
-		r.setSubject(sbjs);
-		r
-			.setDataInfo(
-				OafMapperUtils
-					.dataInfo(
-						false, null, true,
-						false,
-						OafMapperUtils
-							.qualifier(
-								ModelConstants.PROVENANCE_ENRICH,
-								null,
-								ModelConstants.DNET_PROVENANCE_ACTIONS,
-								ModelConstants.DNET_PROVENANCE_ACTIONS),
-						null));
-		return r;
-	}
-
 	private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
 		Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);

 		fosDataset
 			.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi().toLowerCase(), Encoders.STRING())
-			.mapGroups(
-				(MapGroupsFunction<String, FOSDataModel, Result>) (k,
-					it) -> getResult(DHPUtils.generateUnresolvedIdentifier(k, DOI), it),
-				Encoders.bean(Result.class))
+			.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
+				Result r = new Result();
+				FOSDataModel first = it.next();
+				r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
+
+				HashSet<String> level1 = new HashSet<>();
+				HashSet<String> level2 = new HashSet<>();
+				HashSet<String> level3 = new HashSet<>();
+				HashSet<String> level4 = new HashSet<>();
+				addLevels(level1, level2, level3, level4, first);
+				it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v));
+				List<Subject> sbjs = new ArrayList<>();
+				level1
+					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+				level2
+					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+				level3
+					.forEach(
+						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
+				level4
+					.forEach(
+						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
+				r.setSubject(sbjs);
+				r
+					.setDataInfo(
+						OafMapperUtils
+							.dataInfo(
+								false, null, true,
+								false,
+								OafMapperUtils
+									.qualifier(
+										ModelConstants.PROVENANCE_ENRICH,
+										null,
+										ModelConstants.DNET_PROVENANCE_ACTIONS,
+										ModelConstants.DNET_PROVENANCE_ACTIONS),
+								null));
+				return r;
+			}, Encoders.bean(Result.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java
@ -1,92 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.fosnodoi;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.*;
-
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.function.FilterFunction;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.schema.action.AtomicAction;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.Result;
-import eu.dnetlib.dhp.schema.oaf.utils.*;
-import scala.Tuple2;
-
-public class CreateActionSetSparkJob implements Serializable {
-
-	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	public static void main(final String[] args) throws IOException, ParseException {
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					Objects
-						.requireNonNull(
-							CreateActionSetSparkJob.class
-								.getResourceAsStream(
-									"/eu/dnetlib/dhp/actionmanager/fosnodoi/as_parameters.json"))));
-
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String inputPath = parser.get("sourcePath");
-		log.info("inputPath {}", inputPath);
-
-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}", outputPath);
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> createActionSet(spark, inputPath, outputPath));
-
-	}
-
-	private static void createActionSet(SparkSession spark, String inputPath, String outputPath) {
-		spark
-			.read()
-			.textFile(inputPath)
-			.map(
-				(MapFunction<String, Result>) value -> OBJECT_MAPPER.readValue(value, Result.class),
-				Encoders.bean(Result.class))
-			.toJavaRDD()
-			.map(p -> new AtomicAction(p.getClass(), p))
-			.mapToPair(
-				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
-					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
-			.saveAsHadoopFile(
-				outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
@ -22,14 +22,12 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.utils.*;
 import eu.dnetlib.dhp.utils.DHPUtils;
@ -39,12 +37,16 @@ public class CreateActionSetSparkJob implements Serializable {
 	public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations";
 	public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";

+	// DOI-to-DOI citations
+	public static final String COCI = "COCI";
+
+	// PMID-to-PMID citations
+	public static final String POCI = "POCI";
+
 	private static final String DOI_PREFIX = "50|doi_________::";

 	private static final String PMID_PREFIX = "50|pmid________::";
-	private static final String ARXIV_PREFIX = "50|arXiv_______::";

-	private static final String PMCID_PREFIX = "50|pmcid_______::";
 	private static final String TRUST = "0.91";

 	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
@ -77,30 +79,38 @@ public class CreateActionSetSparkJob implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}", outputPath);

+		final boolean shouldDuplicateRels = Optional
+			.ofNullable(parser.get("shouldDuplicateRels"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.FALSE);
+
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
-			spark -> extractContent(spark, inputPath, outputPath));
+			spark -> extractContent(spark, inputPath, outputPath, shouldDuplicateRels));

 	}

-	private static void extractContent(SparkSession spark, String inputPath, String outputPath) {
+	private static void extractContent(SparkSession spark, String inputPath, String outputPath,
+		boolean shouldDuplicateRels) {

-		getTextTextJavaPairRDD(spark, inputPath)
+		getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, COCI)
+			.union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, POCI))
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
 	}

-	private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath) {
+	private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath,
+		boolean shouldDuplicateRels, String prefix) {
 		return spark
 			.read()
-			.textFile(inputPath)
+			.textFile(inputPath + "/" + prefix + "/" + prefix + "_JSON/*")
 			.map(
 				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
 				Encoders.bean(COCI.class))
 			.flatMap(
 				(FlatMapFunction<COCI, Relation>) value -> createRelation(
-					value)
+					value, shouldDuplicateRels, prefix)
 						.iterator(),
 				Encoders.bean(Relation.class))
 			.filter((FilterFunction<Relation>) Objects::nonNull)
@ -111,68 +121,34 @@ public class CreateActionSetSparkJob implements Serializable {
 					new Text(OBJECT_MAPPER.writeValueAsString(aa))));
 	}

-	private static List<Relation> createRelation(COCI value) throws JsonProcessingException {
+	private static List<Relation> createRelation(COCI value, boolean duplicate, String p) {

 		List<Relation> relationList = new ArrayList<>();
-
+		String prefix;
 		String citing;
 		String cited;

-		switch (value.getCiting_pid()) {
-			case "doi":
-				citing = DOI_PREFIX
+		switch (p) {
+			case COCI:
+				prefix = DOI_PREFIX;
+				citing = prefix
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCiting()));
-				break;
-			case "pmid":
-				citing = PMID_PREFIX
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting()));
-				break;
-			case "arxiv":
-				citing = ARXIV_PREFIX
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.arXiv.toString(), value.getCiting()));
-				break;
-			case "pmcid":
-				citing = PMCID_PREFIX
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), value.getCiting()));
-				break;
-			case "isbn":
-			case "issn":
-				return relationList;
-
-			default:
-				throw new IllegalStateException("Invalid prefix: " + new ObjectMapper().writeValueAsString(value));
-		}
-
-		switch (value.getCited_pid()) {
-			case "doi":
-				cited = DOI_PREFIX
+				cited = prefix
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCited()));
 				break;
-			case "pmid":
-				cited = PMID_PREFIX
+			case POCI:
+				prefix = PMID_PREFIX;
+				citing = prefix
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting()));
+				cited = prefix
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCited()));
 				break;
-			case "arxiv":
-				cited = ARXIV_PREFIX
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.arXiv.toString(), value.getCited()));
-				break;
-			case "pmcid":
-				cited = PMCID_PREFIX
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), value.getCited()));
-				break;
-			case "isbn":
-			case "issn":
-				return relationList;
 			default:
-				throw new IllegalStateException("Invalid prefix: " + new ObjectMapper().writeValueAsString(value));
+				throw new IllegalStateException("Invalid prefix: " + p);
 		}

 		if (!citing.equals(cited)) {
@ -181,6 +157,15 @@ public class CreateActionSetSparkJob implements Serializable {
 					getRelation(
 						citing,
 						cited, ModelConstants.CITES));
+
+			if (duplicate && value.getCiting().endsWith(".refs")) {
+				citing = prefix + IdentifierFactory
+					.md5(
+						CleaningFunctions
+							.normalizePidValue(
+								"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
+				relationList.add(getRelation(citing, cited, ModelConstants.CITES));
+			}
 		}

 		return relationList;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
@ -12,7 +12,10 @@ import java.util.zip.ZipInputStream;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -34,17 +37,17 @@ public class GetOpenCitationsRefs implements Serializable {

 		parser.parseArgument(args);

-//		final String[] inputFile = parser.get("inputFile").split(";");
-//		log.info("inputFile {}", Arrays.asList(inputFile));
+		final String[] inputFile = parser.get("inputFile").split(";");
+		log.info("inputFile {}", Arrays.asList(inputFile));

-		final String inputPath = parser.get("inputPath");
-		log.info("inputPath {}", inputPath);
+		final String workingPath = parser.get("workingPath");
+		log.info("workingPath {}", workingPath);

 		final String hdfsNameNode = parser.get("hdfsNameNode");
 		log.info("hdfsNameNode {}", hdfsNameNode);

-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}", outputPath);
+		final String prefix = parser.get("prefix");
+		log.info("prefix {}", prefix);

 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);
@ -53,42 +56,41 @@ public class GetOpenCitationsRefs implements Serializable {

 		GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();

-		ocr.doExtract(inputPath, outputPath, fileSystem);
+		for (String file : inputFile) {
+			ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem, prefix);
+		}

 	}

-	private void doExtract(String inputPath, String outputPath, FileSystem fileSystem)
+	private void doExtract(String inputFile, String workingPath, FileSystem fileSystem, String prefix)
 		throws IOException {

-		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
-			.listFiles(
-				new Path(inputPath), true);
-		while (fileStatusListIterator.hasNext()) {
-			LocatedFileStatus fileStatus = fileStatusListIterator.next();
-			// do stuff with the file like ...
-			FSDataInputStream oc_zip = fileSystem.open(fileStatus.getPath());
-			try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
-				ZipEntry entry = null;
-				while ((entry = zis.getNextEntry()) != null) {
+		final Path path = new Path(inputFile);

-					if (!entry.isDirectory()) {
-						String fileName = entry.getName();
-						// fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
-						fileName = fileName.substring(0, fileName.lastIndexOf("."));
-						// count++;
-						try (
-							FSDataOutputStream out = fileSystem
-								.create(new Path(outputPath + "/" + fileName + ".gz"));
-							GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
+		FSDataInputStream oc_zip = fileSystem.open(path);

-							IOUtils.copy(zis, gzipOs);
+		// int count = 1;
+		try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
+			ZipEntry entry = null;
+			while ((entry = zis.getNextEntry()) != null) {
+
+				if (!entry.isDirectory()) {
+					String fileName = entry.getName();
+					// fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
+					fileName = fileName.substring(0, fileName.lastIndexOf("."));
+					// count++;
+					try (
+						FSDataOutputStream out = fileSystem
+							.create(new Path(workingPath + "/" + prefix + "/" + fileName + ".gz"));
+						GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
+
+						IOUtils.copy(zis, gzipOs);

-						}
 					}
-
 				}

 			}
+
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/MapOCIdsInPids.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/MapOCIdsInPids.java
@ -1,171 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.opencitations;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.ForeachFunction;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import scala.Tuple2;
-
-/**
- * @author miriam.baglioni
- * @Date 29/02/24
- */
-public class MapOCIdsInPids implements Serializable {
-
-	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
-	private static final String DELIMITER = ",";
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	public static void main(final String[] args) throws IOException, ParseException {
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					Objects
-						.requireNonNull(
-							MapOCIdsInPids.class
-								.getResourceAsStream(
-									"/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json"))));
-
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String inputPath = parser.get("inputPath");
-		log.info("inputPath {}", inputPath);
-
-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}", outputPath);
-
-		final String nameNode = parser.get("nameNode");
-		log.info("nameNode {}", nameNode);
-
-		unzipCorrespondenceFile(inputPath, nameNode);
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> mapIdentifiers(spark, inputPath, outputPath));
-
-	}
-
-	private static void unzipCorrespondenceFile(String inputPath, String hdfsNameNode) throws IOException {
-		Configuration conf = new Configuration();
-		conf.set("fs.defaultFS", hdfsNameNode);
-
-		final Path path = new Path(inputPath + "/correspondence/omid.zip");
-		FileSystem fileSystem = FileSystem.get(conf);
-
-		FSDataInputStream project_zip = fileSystem.open(path);
-
-		try (ZipInputStream zis = new ZipInputStream(project_zip)) {
-			ZipEntry entry = null;
-			while ((entry = zis.getNextEntry()) != null) {
-
-				if (!entry.isDirectory()) {
-					String fileName = entry.getName();
-					byte buffer[] = new byte[1024];
-					int count;
-
-					try (
-						FSDataOutputStream out = fileSystem
-							.create(new Path(inputPath + "/correspondence/omid.csv"))) {
-
-						while ((count = zis.read(buffer, 0, buffer.length)) != -1)
-							out.write(buffer, 0, count);
-
-					}
-
-				}
-
-			}
-
-		}
-
-	}
-
-	private static void mapIdentifiers(SparkSession spark, String inputPath, String outputPath) {
-		Dataset<COCI> coci = spark
-			.read()
-			.textFile(inputPath + "/JSON")
-			.map(
-				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
-				Encoders.bean(COCI.class));
-
-		Dataset<Tuple2<String, String>> correspondenceData = spark
-			.read()
-			.format("csv")
-			.option("sep", DELIMITER)
-			.option("inferSchema", "true")
-			.option("header", "true")
-			.option("quotes", "\"")
-			.load(inputPath + "/correspondence/omid.csv")
-			.repartition(5000)
-			.flatMap((FlatMapFunction<Row, Tuple2<String, String>>) r -> {
-				String ocIdentifier = r.getAs("omid");
-				String[] correspondentIdentifiers = ((String) r.getAs("id")).split(" ");
-				return Arrays
-					.stream(correspondentIdentifiers)
-					.map(ci -> new Tuple2<String, String>(ocIdentifier, ci))
-					.collect(Collectors.toList())
-					.iterator();
-			}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
-
-		Dataset<COCI> mappedCitingDataset = coci
-			.joinWith(correspondenceData, coci.col("citing").equalTo(correspondenceData.col("_1")))
-			.map((MapFunction<Tuple2<COCI, Tuple2<String, String>>, COCI>) t2 -> {
-				String correspondent = t2._2()._2();
-				t2._1().setCiting_pid(correspondent.substring(0, correspondent.indexOf(":")));
-				t2._1().setCiting(correspondent.substring(correspondent.indexOf(":") + 1));
-				return t2._1();
-			}, Encoders.bean(COCI.class));
-
-		mappedCitingDataset
-			.joinWith(correspondenceData, mappedCitingDataset.col("cited").equalTo(correspondenceData.col("_1")))
-			.map((MapFunction<Tuple2<COCI, Tuple2<String, String>>, COCI>) t2 -> {
-				String correspondent = t2._2()._2();
-				t2._1().setCited_pid(correspondent.substring(0, correspondent.indexOf(":")));
-				t2._1().setCited(correspondent.substring(correspondent.indexOf(":") + 1));
-				return t2._1();
-			}, Encoders.bean(COCI.class))
-			.write()
-			.mode(SaveMode.Append)
-			.option("compression", "gzip")
-			.json(outputPath);
-
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -12,9 +12,11 @@ import java.util.Optional;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
@ -40,21 +42,19 @@ public class ReadCOCI implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

-		final String hdfsNameNode = parser.get("hdfsNameNode");
-		log.info("hdfsNameNode {}", hdfsNameNode);
-
+		final String[] inputFile = parser.get("inputFile").split(";");
+		log.info("inputFile {}", Arrays.asList(inputFile));
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("inputPath");
+		final String workingPath = parser.get("workingPath");
 		log.info("workingPath {}", workingPath);

+		final String format = parser.get("format");
+		log.info("format {}", format);
+
 		SparkConf sconf = new SparkConf();

-		Configuration conf = new Configuration();
-		conf.set("fs.defaultFS", hdfsNameNode);
-
-		FileSystem fileSystem = FileSystem.get(conf);
 		final String delimiter = Optional
 			.ofNullable(parser.get("delimiter"))
 			.orElse(DEFAULT_DELIMITER);
@ -66,21 +66,20 @@ public class ReadCOCI implements Serializable {
 				doRead(
 					spark,
 					workingPath,
-					fileSystem,
+					inputFile,
 					outputPath,
-					delimiter);
+					delimiter,
+					format);
 			});
 	}

-	private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem,
+	private static void doRead(SparkSession spark, String workingPath, String[] inputFiles,
 		String outputPath,
-		String delimiter) throws IOException {
-		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
-			.listFiles(
-				new Path(workingPath), true);
-		while (fileStatusListIterator.hasNext()) {
-			LocatedFileStatus fileStatus = fileStatusListIterator.next();
-			log.info("extracting file {}", fileStatus.getPath().toString());
+		String delimiter, String format) {
+
+		for (String inputFile : inputFiles) {
+			String pString = workingPath + "/" + inputFile + ".gz";
+
 			Dataset<Row> cociData = spark
 				.read()
 				.format("csv")
@ -88,26 +87,26 @@ public class ReadCOCI implements Serializable {
 				.option("inferSchema", "true")
 				.option("header", "true")
 				.option("quotes", "\"")
-				.load(fileStatus.getPath().toString())
+				.load(pString)
 				.repartition(100);

 			cociData.map((MapFunction<Row, COCI>) row -> {
-
 				COCI coci = new COCI();
-
-				coci.setCiting(row.getString(1));
-				coci.setCited(row.getString(2));
-
+				if (format.equals("COCI")) {
+					coci.setCiting(row.getString(1));
+					coci.setCited(row.getString(2));
+				} else {
+					coci.setCiting(String.valueOf(row.getInt(1)));
+					coci.setCited(String.valueOf(row.getInt(2)));
+				}
 				coci.setOci(row.getString(0));

 				return coci;
 			}, Encoders.bean(COCI.class))
-				.filter((FilterFunction<COCI>) c -> c != null)
 				.write()
-				.mode(SaveMode.Append)
+				.mode(SaveMode.Overwrite)
 				.option("compression", "gzip")
-				.json(outputPath);
-			fileSystem.rename(fileStatus.getPath(), new Path("/tmp/miriam/OC/DONE"));
+				.json(outputPath + inputFile);
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
@ -9,10 +9,8 @@ public class COCI implements Serializable {
 	private String oci;

 	private String citing;
-	private String citing_pid;

 	private String cited;
-	private String cited_pid;

 	public String getOci() {
 		return oci;
@ -27,8 +25,6 @@ public class COCI implements Serializable {
 	}

 	public void setCiting(String citing) {
-		if (citing != null && citing.startsWith("omid:"))
-			citing = citing.substring(5);
 		this.citing = citing;
 	}

@ -37,24 +33,7 @@ public class COCI implements Serializable {
 	}

 	public void setCited(String cited) {
-		if (cited != null && cited.startsWith("omid:"))
-			cited = cited.substring(5);
 		this.cited = cited;
 	}

-	public String getCiting_pid() {
-		return citing_pid;
-	}
-
-	public void setCiting_pid(String citing_pid) {
-		this.citing_pid = citing_pid;
-	}
-
-	public String getCited_pid() {
-		return cited_pid;
-	}
-
-	public void setCited_pid(String cited_pid) {
-		this.cited_pid = cited_pid;
-	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
 import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
 import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -32,7 +33,6 @@ import eu.dnetlib.dhp.schema.oaf.H2020Classification;
 import eu.dnetlib.dhp.schema.oaf.H2020Programme;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.Project;
-import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;

@ -160,11 +160,9 @@ public class SparkAtomicActionJob {
 				(MapFunction<Project, String>) OafEntity::getId,
 				Encoders.STRING())
 			.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
-				Project merge = it.next();
-				while (it.hasNext()) {
-					merge = MergeUtils.mergeProject(merge, it.next());
-				}
-				return merge;
+				Project first = it.next();
+				it.forEachRemaining(first::mergeFrom);
+				return first;
 			}, Encoders.bean(Project.class))
 			.toJavaRDD()
 			.map(p -> new AtomicAction(Project.class, p))
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateActionSetSparkJob.java
@ -1,196 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.transformativeagreement;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.*;
-
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.FilterFunction;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.actionmanager.transformativeagreement.model.TransformativeAgreementModel;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.schema.action.AtomicAction;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.Country;
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.Result;
-import eu.dnetlib.dhp.schema.oaf.utils.*;
-import scala.Tuple2;
-
-public class CreateActionSetSparkJob implements Serializable {
-
-	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
-
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	private static final String IREL_PROJECT = "40|100018998___::1e5e62235d094afd01cd56e65112fc63";
-	private static final String TRANSFORMATIVE_AGREEMENT = "openapc::transformativeagreement";
-
-	public static void main(final String[] args) throws IOException, ParseException {
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					Objects
-						.requireNonNull(
-							CreateActionSetSparkJob.class
-								.getResourceAsStream(
-									"/eu/dnetlib/dhp/actionmanager/transformativeagreement/as_parameters.json"))));
-
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String inputPath = parser.get("inputPath");
-		log.info("inputPath {}", inputPath);
-
-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}", outputPath);
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> createActionSet(spark, inputPath, outputPath));
-
-	}
-
-	private static void createActionSet(SparkSession spark, String inputPath, String outputPath) {
-		JavaRDD<AtomicAction> relations = spark
-			.read()
-			.textFile(inputPath)
-			.map(
-				(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
-					.readValue(value, TransformativeAgreementModel.class),
-				Encoders.bean(TransformativeAgreementModel.class))
-			.flatMap(
-				(FlatMapFunction<TransformativeAgreementModel, Relation>) value -> createRelation(
-					value)
-						.iterator(),
-				Encoders.bean(Relation.class))
-			.filter((FilterFunction<Relation>) Objects::nonNull)
-			.toJavaRDD()
-			.map(p -> new AtomicAction(p.getClass(), p));
-
-		spark
-			.read()
-			.textFile(inputPath)
-			.map(
-				(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
-					.readValue(value, TransformativeAgreementModel.class),
-				Encoders.bean(TransformativeAgreementModel.class))
-			.map(
-				(MapFunction<TransformativeAgreementModel, Result>) value -> createResult(
-					value),
-				Encoders.bean(Result.class))
-			.filter((FilterFunction<Result>) r -> r != null)
-			.toJavaRDD()
-			.map(p -> new AtomicAction(p.getClass(), p))
-			.union(relations)
-			.mapToPair(
-				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
-					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
-			.saveAsHadoopFile(
-				outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
-
-	}
-
-	private static Result createResult(TransformativeAgreementModel value) {
-		Result r = new Result();
-		r
-			.setId(
-				"50|doi_________::"
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi())));
-		r.setTransformativeAgreement(value.getAgreement());
-		Country country = new Country();
-		country.setClassid(value.getCountry());
-		country.setClassname(value.getCountry());
-		country
-			.setDataInfo(
-				OafMapperUtils
-					.dataInfo(
-						false, ModelConstants.SYSIMPORT_ACTIONSET, false, false,
-						OafMapperUtils
-							.qualifier(
-								"openapc::transformativeagreement",
-								"Harvested from Trnasformative Agreement file from OpenAPC",
-								ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
-						"0.9"));
-		country.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
-		country.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
-		r.setCountry(Arrays.asList(country));
-		return r;
-	}
-
-	private static List<Relation> createRelation(TransformativeAgreementModel value) {
-
-		List<Relation> relationList = new ArrayList<>();
-
-		if (value.getAgreement().startsWith("IReL")) {
-			String paper;
-
-			paper = "50|doi_________::"
-				+ IdentifierFactory
-					.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi()));
-
-			relationList
-				.add(
-					getRelation(
-						paper,
-						IREL_PROJECT, ModelConstants.IS_PRODUCED_BY));
-
-			relationList.add(getRelation(IREL_PROJECT, paper, ModelConstants.PRODUCES));
-		}
-		return relationList;
-	}
-
-	public static Relation getRelation(
-		String source,
-		String target,
-		String relClass) {
-
-		return OafMapperUtils
-			.getRelation(
-				source,
-				target,
-				ModelConstants.RESULT_PROJECT,
-				ModelConstants.OUTCOME,
-				relClass,
-				Arrays
-					.asList(
-						OafMapperUtils.keyValue(ModelConstants.OPEN_APC_ID, ModelConstants.OPEN_APC_NAME)),
-				OafMapperUtils
-					.dataInfo(
-						false, null, false, false,
-						OafMapperUtils
-							.qualifier(
-								TRANSFORMATIVE_AGREEMENT, "Transformative Agreement",
-								ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
-						"0.9"),
-				null);
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/model/TransformativeAgreementModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/model/TransformativeAgreementModel.java
@ -1,51 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.transformativeagreement.model;
-
-import java.io.Serializable;
-
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-
-/**
- * @author miriam.baglioni
- * @Date 18/12/23
- */
-@JsonIgnoreProperties(ignoreUnknown = true)
-
-public class TransformativeAgreementModel implements Serializable {
-	private String institution;
-	private String doi;
-	private String agreement;
-	private String country;
-
-	public String getCountry() {
-		return country;
-	}
-
-	public void setCountry(String country) {
-		this.country = country;
-	}
-
-	public String getInstitution() {
-		return institution;
-	}
-
-	public void setInstitution(String institution) {
-		this.institution = institution;
-	}
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public String getAgreement() {
-		return agreement;
-	}
-
-	public void setAgreement(String agreement) {
-		this.agreement = agreement;
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.actionmanager.Constants.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;

 import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Optional;
@ -14,9 +13,7 @@ import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -71,59 +68,18 @@ public class SparkAtomicActionUsageJob implements Serializable {

 		final String workingPath = parser.get("workingPath");

-		final String datasourcePath = parser.get("datasourcePath");
-
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				removeOutputDir(spark, outputPath);
-				prepareResultData(
-					dbname, spark, workingPath + "/usageDb",
-					"usage_stats",
-					"result_id",
-					"repository_id",
-					datasourcePath);
+				prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats", "result_id");
 				prepareData(dbname, spark, workingPath + "/projectDb", "project_stats", "id");
 				prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats", "repository_id");
 				writeActionSet(spark, workingPath, outputPath);
 			});
 	}

-	private static void prepareResultData(String dbname, SparkSession spark, String workingPath, String tableName,
-		String resultAttributeName, String datasourceAttributeName,
-		String datasourcePath) {
-		Dataset<UsageStatsResultModel> resultModel = spark
-			.sql(
-				String
-					.format(
-						"select %s as id, %s as datasourceId, sum(downloads) as downloads, sum(views) as views " +
-							"from %s.%s group by %s, %s",
-						resultAttributeName, datasourceAttributeName, dbname, tableName, resultAttributeName,
-						datasourceAttributeName))
-			.as(Encoders.bean(UsageStatsResultModel.class));
-		Dataset<Datasource> datasource = readPath(spark, datasourcePath, Datasource.class)
-			.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
-			.map((MapFunction<Datasource, Datasource>) d -> {
-				d.setId(d.getId().substring(3));
-				return d;
-			}, Encoders.bean(Datasource.class));
-		resultModel
-			.joinWith(datasource, resultModel.col("datasourceId").equalTo(datasource.col("id")), "left")
-			.map((MapFunction<Tuple2<UsageStatsResultModel, Datasource>, UsageStatsResultModel>) t2 -> {
-				UsageStatsResultModel usrm = t2._1();
-				if (Optional.ofNullable(t2._2()).isPresent())
-					usrm.setDatasourceId(usrm.getDatasourceId() + "||" + t2._2().getOfficialname().getValue());
-				else
-					usrm.setDatasourceId(usrm.getDatasourceId() + "||NO_MATCH_FOUND");
-				return usrm;
-			}, Encoders.bean(UsageStatsResultModel.class))
-			.write()
-			.mode(SaveMode.Overwrite)
-			.option("compression", "gzip")
-			.json(workingPath);
-	}
-
 	private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName,
 		String attribute_name) {
 		spark
@ -159,62 +115,15 @@ public class SparkAtomicActionUsageJob implements Serializable {

 	}

-	public static Measure newMeasureInstance(String id) {
-		Measure m = new Measure();
-		m.setId(id);
-		m.setUnit(new ArrayList<>());
-		return m;
-	}
-
 	private static Dataset<Result> getFinalIndicatorsResult(SparkSession spark, String inputPath) {

-		return readPath(spark, inputPath, UsageStatsResultModel.class)
-			.groupByKey((MapFunction<UsageStatsResultModel, String>) usm -> usm.getId(), Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, UsageStatsResultModel, Result>) (k, it) -> {
+		return readPath(spark, inputPath, UsageStatsModel.class)
+			.map((MapFunction<UsageStatsModel, Result>) usm -> {
 				Result r = new Result();
-				r.setId("50|" + k);
-				// id = download or view and unit = list of key value pairs
-				Measure download = newMeasureInstance("downloads");
-				Measure view = newMeasureInstance("views");
-				UsageStatsResultModel first = it.next();
-				addCountForDatasource(download, first, view);
-				it.forEachRemaining(usm -> {
-					addCountForDatasource(download, usm, view);
-				});
-				r.setMeasures(Arrays.asList(download, view));
+				r.setId("50|" + usm.getId());
+				r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
 				return r;
-			}, Encoders.bean(Result.class))
-//			.map((MapFunction<UsageStatsResultModel, Result>) usm -> {
-//				Result r = new Result();
-//				r.setId("50|" + usm.getId());
-//				r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
-//				return r;
-//			}, Encoders.bean(Result.class));
-		;
-	}
-
-	private static void addCountForDatasource(Measure download, UsageStatsResultModel usm, Measure view) {
-		DataInfo dataInfo = OafMapperUtils
-			.dataInfo(
-				false,
-				UPDATE_DATA_INFO_TYPE,
-				true,
-				false,
-				OafMapperUtils
-					.qualifier(
-						UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
-						UPDATE_CLASS_NAME,
-						ModelConstants.DNET_PROVENANCE_ACTIONS,
-						ModelConstants.DNET_PROVENANCE_ACTIONS),
-				"");
-		download
-			.getUnit()
-			.add(
-				OafMapperUtils
-					.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getDownloads()), dataInfo));
-		view
-			.getUnit()
-			.add(OafMapperUtils.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getViews()), dataInfo));
+			}, Encoders.bean(Result.class));
 	}

 	private static Dataset<Project> getFinalIndicatorsProject(SparkSession spark, String inputPath) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsResultModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsResultModel.java
@ -1,18 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.usagestats;
-
-/**
- * @author miriam.baglioni
- * @Date 30/06/23
- */
-public class UsageStatsResultModel extends UsageStatsModel {
-	private String datasourceId;
-
-	public String getDatasourceId() {
-		return datasourceId;
-	}
-
-	public void setDatasourceId(String datasourceId) {
-		this.datasourceId = datasourceId;
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
@ -1,232 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.webcrawl;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.Serializable;
-import java.util.*;
-import java.util.stream.Collectors;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.sql.*;
-import org.apache.spark.sql.types.StructType;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.schema.action.AtomicAction;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
-import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
-import eu.dnetlib.dhp.schema.oaf.utils.PidType;
-import scala.Tuple2;
-
-/**
- * @author miriam.baglioni
- * @Date 18/04/24
- */
-public class CreateActionSetFromWebEntries implements Serializable {
-	private static final Logger log = LoggerFactory.getLogger(CreateActionSetFromWebEntries.class);
-	private static final String DOI_PREFIX = "50|doi_________::";
-
-	private static final String ROR_PREFIX = "20|ror_________::";
-
-	private static final String PMID_PREFIX = "50|pmid________::";
-
-	private static final String PMCID_PREFIX = "50|pmc_________::";
-	private static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
-	private static final String WEB_CRAWL_NAME = "Web Crawl";
-	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	public static void main(String[] args) throws Exception {
-		String jsonConfiguration = IOUtils
-			.toString(
-				CreateActionSetFromWebEntries.class
-					.getResourceAsStream(
-						"/eu/dnetlib/dhp/actionmanager/webcrawl/as_parameters.json"));
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String inputPath = parser.get("sourcePath");
-		log.info("inputPath: {}", inputPath);
-
-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath: {}", outputPath);
-
-		SparkConf conf = new SparkConf();
-
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-
-				createActionSet(spark, inputPath, outputPath);
-
-			});
-	}
-
-	public static void createActionSet(SparkSession spark, String inputPath,
-		String outputPath) {
-
-		final Dataset<Row> dataset = readWebCrawl(spark, inputPath)
-			.filter("publication_year <= 2020 or country_code=='IE'")
-			.drop("publication_year");
-
-		dataset.flatMap((FlatMapFunction<Row, Relation>) row -> {
-			List<Relation> ret = new ArrayList<>();
-			final String ror = ROR_PREFIX
-				+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
-			ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
-			ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
-			ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
-
-			return ret
-				.iterator();
-		}, Encoders.bean(Relation.class))
-			.toJavaRDD()
-			.map(p -> new AtomicAction(p.getClass(), p))
-			.mapToPair(
-				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
-					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
-			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
-
-	}
-
-	private static Dataset<Row> readWebCrawl(SparkSession spark, String inputPath) {
-		StructType webInfo = StructType
-			.fromDDL(
-				"`id` STRING , `doi` STRING, `ids` STRUCT<`pmid` :STRING, `pmcid`: STRING >, `publication_year` STRING, "
-					+
-					"`authorships` ARRAY<STRUCT <`institutions`: ARRAY <STRUCT <`ror`: STRING, `country_code` :STRING>>>>");
-
-		return spark
-			.read()
-			.schema(webInfo)
-			.json(inputPath)
-			.withColumn(
-				"authors", functions
-					.explode(
-						functions.col("authorships")))
-			.selectExpr("id", "doi", "ids", "publication_year", "authors.institutions as institutions")
-			.withColumn(
-				"institution", functions
-					.explode(
-						functions.col("institutions")))
-			.selectExpr(
-				"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
-				"institution.country_code as country_code", "publication_year")
-			.distinct();
-
-	}
-
-	private static List<Relation> createAffiliationRelationPairPMCID(String pmcid, String ror) {
-		if (pmcid == null)
-			return new ArrayList<>();
-
-		return createAffiliatioRelationPair(
-			PMCID_PREFIX
-				+ IdentifierFactory
-					.md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), removeResolver("PMC", pmcid))),
-			ror);
-	}
-
-	private static List<Relation> createAffiliationRelationPairPMID(String pmid, String ror) {
-		if (pmid == null)
-			return new ArrayList<>();
-
-		return createAffiliatioRelationPair(
-			PMID_PREFIX
-				+ IdentifierFactory
-					.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), removeResolver("PMID", pmid))),
-			ror);
-	}
-
-	private static String removeResolver(String pidType, String pid) {
-		switch (pidType) {
-			case "PMID":
-				return pid.substring(33);
-			case "PMC":
-				return "PMC" + pid.substring(43);
-			case "DOI":
-				return pid.substring(16);
-		}
-
-		throw new RuntimeException();
-
-	}
-
-	private static List<Relation> createAffiliationRelationPairDOI(String doi, String ror) {
-		if (doi == null)
-			return new ArrayList<>();
-
-		return createAffiliatioRelationPair(
-			DOI_PREFIX
-				+ IdentifierFactory
-					.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), removeResolver("DOI", doi))),
-			ror);
-
-	}
-
-	private static List<Relation> createAffiliatioRelationPair(String resultId, String orgId) {
-		ArrayList<Relation> newRelations = new ArrayList();
-
-		newRelations
-			.add(
-				OafMapperUtils
-					.getRelation(
-						orgId, resultId, ModelConstants.RESULT_ORGANIZATION, ModelConstants.AFFILIATION,
-						ModelConstants.IS_AUTHOR_INSTITUTION_OF,
-						Arrays
-							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
-						OafMapperUtils
-							.dataInfo(
-								false, null, false, false,
-								OafMapperUtils
-									.qualifier(
-										"sysimport:crasswalk:webcrawl", "Imported from Webcrawl",
-										ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
-								"0.9"),
-						null));
-
-		newRelations
-			.add(
-				OafMapperUtils
-					.getRelation(
-						resultId, orgId, ModelConstants.RESULT_ORGANIZATION, ModelConstants.AFFILIATION,
-						ModelConstants.HAS_AUTHOR_INSTITUTION,
-						Arrays
-							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
-						OafMapperUtils
-							.dataInfo(
-								false, null, false, false,
-								OafMapperUtils
-									.qualifier(
-										"sysimport:crasswalk:webcrawl", "Imported from Webcrawl",
-										ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
-								"0.9"),
-						null));
-
-		return newRelations;
-
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDWorker.java
@ -1,244 +0,0 @@
-
-package eu.dnetlib.dhp.collection.orcid;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.concurrent.BlockingQueue;
-
-import javax.swing.*;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.http.HttpHeaders;
-import org.jetbrains.annotations.NotNull;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.common.collection.HttpClientParams;
-
-public class ORCIDWorker extends Thread {
-
-	final static Logger log = LoggerFactory.getLogger(ORCIDWorker.class);
-
-	public static String JOB_COMPLETE = "JOB_COMPLETE";
-
-	private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
-
-	private final BlockingQueue<String> queue;
-
-	private boolean hasComplete = false;
-
-	private final SequenceFile.Writer employments;
-
-	private final SequenceFile.Writer summary;
-	private final SequenceFile.Writer works;
-
-	private final String token;
-
-	private final String id;
-
-	public static ORCIDWorkerBuilder builder() {
-		return new ORCIDWorkerBuilder();
-	}
-
-	public ORCIDWorker(String id, BlockingQueue<String> myqueue, SequenceFile.Writer employments,
-		SequenceFile.Writer summary, SequenceFile.Writer works, String token) {
-		this.id = id;
-		this.queue = myqueue;
-		this.employments = employments;
-		this.summary = summary;
-		this.works = works;
-		this.token = token;
-	}
-
-	public static String retrieveURL(final String id, final String apiUrl, String token) {
-		try {
-			final HttpURLConnection urlConn = getHttpURLConnection(apiUrl, token);
-			if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
-				InputStream input = urlConn.getInputStream();
-				return IOUtils.toString(input);
-			} else {
-				log
-					.error(
-						"Thread {} UNABLE TO DOWNLOAD FROM THIS URL {} , status code {}", id, apiUrl,
-						urlConn.getResponseCode());
-			}
-		} catch (Exception e) {
-			log.error("Thread {}  Error on retrieving URL {} {}", id, apiUrl, e);
-		}
-		return null;
-	}
-
-	@NotNull
-	private static HttpURLConnection getHttpURLConnection(String apiUrl, String token) throws IOException {
-		final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiUrl).openConnection();
-		final HttpClientParams clientParams = new HttpClientParams();
-		urlConn.setInstanceFollowRedirects(false);
-		urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
-		urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
-		urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
-		urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", token));
-		return urlConn;
-	}
-
-	private static String generateSummaryURL(final String orcidId) {
-		return "https://api.orcid.org/v3.0/" + orcidId + "/record";
-	}
-
-	private static String generateWorksURL(final String orcidId) {
-		return "https://api.orcid.org/v3.0/" + orcidId + "/works";
-	}
-
-	private static String generateEmploymentsURL(final String orcidId) {
-		return "https://api.orcid.org/v3.0/" + orcidId + "/employments";
-	}
-
-	private static void writeResultToSequenceFile(String id, String url, String token, String orcidId,
-		SequenceFile.Writer file) throws IOException {
-		final String response = retrieveURL(id, url, token);
-		if (response != null) {
-			if (orcidId == null) {
-				log.error("Thread {}   {}   {}", id, orcidId, response);
-				throw new RuntimeException("null items ");
-			}
-
-			if (file == null) {
-				log.error("Thread {}   file is null for {}  URL:{}", id, url, orcidId);
-			} else {
-				file.append(new Text(orcidId), new Text(response));
-				file.hflush();
-			}
-
-		} else
-			log.error("Thread {} response is null for {}  URL:{}", id, url, orcidId);
-
-	}
-
-	@Override
-	public void run() {
-		final Text key = new Text();
-		final Text value = new Text();
-		long start;
-		long total_time;
-		String orcidId = "";
-		int requests = 0;
-		if (summary == null || employments == null || works == null)
-			throw new RuntimeException("Null files");
-
-		while (!hasComplete) {
-			try {
-
-				orcidId = queue.take();
-
-				if (orcidId.equalsIgnoreCase(JOB_COMPLETE)) {
-					hasComplete = true;
-				} else {
-					start = System.currentTimeMillis();
-					writeResultToSequenceFile(id, generateSummaryURL(orcidId), token, orcidId, summary);
-					total_time = System.currentTimeMillis() - start;
-					requests++;
-					if (total_time < 1000) {
-						// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
-						// hence
-						// the time between two http request in a thread must be 1 second
-						Thread.sleep(1000L - total_time);
-					}
-					start = System.currentTimeMillis();
-					writeResultToSequenceFile(id, generateWorksURL(orcidId), token, orcidId, works);
-					total_time = System.currentTimeMillis() - start;
-					requests++;
-					if (total_time < 1000) {
-						// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
-						// hence
-						// the time between two http request in a thread must be 1 second
-						Thread.sleep(1000L - total_time);
-					}
-					start = System.currentTimeMillis();
-					writeResultToSequenceFile(id, generateEmploymentsURL(orcidId), token, orcidId, employments);
-					total_time = System.currentTimeMillis() - start;
-					requests++;
-					if (total_time < 1000) {
-						// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
-						// hence
-						// the time between two http request in a thread must be 1 second
-						Thread.sleep(1000L - total_time);
-					}
-					if (requests % 30 == 0) {
-						log.info("Thread {}   Downloaded {}", id, requests);
-					}
-				}
-
-			} catch (Throwable e) {
-
-				log.error("Thread {}  Unable to save ORICD: {} item error", id, orcidId, e);
-
-			}
-
-		}
-		try {
-			works.close();
-			summary.close();
-			employments.close();
-		} catch (Throwable e) {
-			throw new RuntimeException(e);
-		}
-
-		log.info("Thread {}  COMPLETE ", id);
-		log.info("Thread {}   Downloaded {}", id, requests);
-
-	}
-
-	public static class ORCIDWorkerBuilder {
-
-		private String id;
-		private SequenceFile.Writer employments;
-		private SequenceFile.Writer summary;
-		private SequenceFile.Writer works;
-		private BlockingQueue<String> queue;
-
-		private String token;
-
-		public ORCIDWorkerBuilder withId(final String id) {
-			this.id = id;
-			return this;
-		}
-
-		public ORCIDWorkerBuilder withEmployments(final SequenceFile.Writer sequenceFile) {
-			this.employments = sequenceFile;
-			return this;
-		}
-
-		public ORCIDWorkerBuilder withSummary(final SequenceFile.Writer sequenceFile) {
-			this.summary = sequenceFile;
-			return this;
-		}
-
-		public ORCIDWorkerBuilder withWorks(final SequenceFile.Writer sequenceFile) {
-			this.works = sequenceFile;
-			return this;
-		}
-
-		public ORCIDWorkerBuilder withAccessToken(final String accessToken) {
-			this.token = accessToken;
-			return this;
-		}
-
-		public ORCIDWorkerBuilder withBlockingQueue(final BlockingQueue<String> queue) {
-			this.queue = queue;
-			return this;
-		}
-
-		public ORCIDWorker build() {
-			if (this.summary == null || this.works == null || this.employments == null || StringUtils.isEmpty(token)
-				|| queue == null)
-				throw new RuntimeException("Unable to build missing required params");
-			return new ORCIDWorker(id, queue, employments, summary, works, token);
-		}
-
-	}
-
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidGetUpdatesFile.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidGetUpdatesFile.java
@ -1,171 +0,0 @@
-
-package eu.dnetlib.dhp.collection.orcid;
-
-import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
-
-import java.io.*;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.common.collection.HttpClientParams;
-
-public class OrcidGetUpdatesFile {
-
-	private static Logger log = LoggerFactory.getLogger(OrcidGetUpdatesFile.class);
-
-	public static void main(String[] args) throws Exception {
-
-		ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					Objects
-						.requireNonNull(
-							OrcidGetUpdatesFile.class
-								.getResourceAsStream(
-									"/eu/dnetlib/dhp/collection/orcid/download_orcid_update_parameter.json")))
-
-		);
-		parser.parseArgument(args);
-
-		final String namenode = parser.get("namenode");
-		log.info("got variable namenode: {}", namenode);
-
-		final String master = parser.get("master");
-		log.info("got variable master: {}", master);
-
-		final String targetPath = parser.get("targetPath");
-		log.info("got variable targetPath: {}", targetPath);
-
-		final String apiURL = parser.get("apiURL");
-		log.info("got variable apiURL: {}", apiURL);
-
-		final String accessToken = parser.get("accessToken");
-		log.info("got variable accessToken: {}", accessToken);
-
-		final String graphPath = parser.get("graphPath");
-		log.info("got variable graphPath: {}", graphPath);
-
-		final SparkSession spark = SparkSession
-			.builder()
-			.appName(OrcidGetUpdatesFile.class.getName())
-			.master(master)
-			.getOrCreate();
-
-		final String latestDate = spark
-			.read()
-			.load(graphPath + "/Authors")
-			.selectExpr("max(lastModifiedDate)")
-			.first()
-			.getString(0);
-
-		log.info("latest date is {}", latestDate);
-
-		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(namenode));
-
-		new OrcidGetUpdatesFile().readTar(fileSystem, accessToken, apiURL, targetPath, latestDate);
-
-	}
-
-	private SequenceFile.Writer createFile(Path aPath, FileSystem fileSystem) throws IOException {
-		return SequenceFile
-			.createWriter(
-				fileSystem.getConf(),
-				SequenceFile.Writer.file(aPath),
-				SequenceFile.Writer.keyClass(Text.class),
-				SequenceFile.Writer.valueClass(Text.class));
-	}
-
-	private ORCIDWorker createWorker(final String id, final String targetPath, final BlockingQueue<String> queue,
-		final String accessToken, FileSystem fileSystem) throws Exception {
-		return ORCIDWorker
-			.builder()
-			.withId(id)
-			.withEmployments(createFile(new Path(String.format("%s/employments_%s", targetPath, id)), fileSystem))
-			.withSummary(createFile(new Path(String.format("%s/summary_%s", targetPath, id)), fileSystem))
-			.withWorks(createFile(new Path(String.format("%s/works_%s", targetPath, id)), fileSystem))
-			.withAccessToken(accessToken)
-			.withBlockingQueue(queue)
-			.build();
-	}
-
-	public void readTar(FileSystem fileSystem, final String accessToken, final String apiURL, final String targetPath,
-		final String startDate) throws Exception {
-		final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiURL).openConnection();
-		final HttpClientParams clientParams = new HttpClientParams();
-		urlConn.setInstanceFollowRedirects(false);
-		urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
-		urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
-		if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
-			InputStream input = urlConn.getInputStream();
-
-			Path hdfsWritePath = new Path("/tmp/orcid_updates.tar.gz");
-			final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
-			IOUtils.copy(input, fsDataOutputStream);
-			fsDataOutputStream.flush();
-			fsDataOutputStream.close();
-			FSDataInputStream updateFile = fileSystem.open(hdfsWritePath);
-			TarArchiveInputStream tais = new TarArchiveInputStream(new GzipCompressorInputStream(
-				new BufferedInputStream(
-					updateFile.getWrappedStream())));
-			TarArchiveEntry entry;
-
-			BlockingQueue<String> queue = new ArrayBlockingQueue<String>(3000);
-			final List<ORCIDWorker> workers = new ArrayList<>();
-			for (int i = 0; i < 22; i++) {
-				workers.add(createWorker("" + i, targetPath, queue, accessToken, fileSystem));
-			}
-			workers.forEach(Thread::start);
-
-			while ((entry = tais.getNextTarEntry()) != null) {
-
-				if (entry.isFile()) {
-
-					BufferedReader br = new BufferedReader(new InputStreamReader(tais));
-					System.out.println(br.readLine());
-					br
-						.lines()
-						.map(l -> l.split(","))
-						.filter(s -> StringUtils.compare(s[3].substring(0, 10), startDate) > 0)
-						.map(s -> s[0])
-						.forEach(s -> {
-							try {
-								queue.put(s);
-							} catch (InterruptedException e) {
-								throw new RuntimeException(e);
-							}
-						});
-
-				}
-			}
-
-			for (int i = 0; i < 22; i++) {
-				queue.put(ORCIDWorker.JOB_COMPLETE);
-			}
-			for (ORCIDWorker worker : workers) {
-				worker.join();
-			}
-		}
-
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
@ -1,15 +1,11 @@

 package eu.dnetlib.dhp.collection.orcid;

-import java.util.*;
-import java.util.stream.Collectors;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;

 import org.apache.commons.lang3.StringUtils;
-import org.dom4j.Document;
-import org.dom4j.DocumentFactory;
-import org.dom4j.DocumentHelper;
-import org.dom4j.Node;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -44,8 +40,8 @@ public class OrcidParser {
 	private static final String NS_ERROR = "error";
 	private static final String NS_HISTORY = "history";
 	private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
-	private static final String NS_EMPLOYMENT = "employment";
-	private static final String NS_EMPLOYMENT_URL = "http://www.orcid.org/ns/employment";
+	private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
+	private static final String NS_BULK = "bulk";
 	private static final String NS_EXTERNAL = "external-identifier";
 	private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";

@ -65,7 +61,6 @@ public class OrcidParser {
 		ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
 		ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
 		ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
-		ap.declareXPathNameSpace(NS_EMPLOYMENT, NS_EMPLOYMENT_URL);
 	}

 	public Author parseSummary(final String xml) {
@ -75,15 +70,13 @@ public class OrcidParser {
 			generateParsedDocument(xml);
 			List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
 				.getTextValuesWithAttributes(
-					ap, vn, "//record:record", Collections.singletonList("path"));
+					ap, vn, "//record:record", Arrays.asList("path"));
 			if (!recordNodes.isEmpty()) {
 				final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
 				author.setOrcid(oid);
 			} else {
 				return null;
 			}
-			final String ltm = VtdUtilityParser.getSingleValue(ap, vn, "//common:last-modified-date");
-			author.setLastModifiedDate(ltm);
 			List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
 				.getTextValuesWithAttributes(
 					ap, vn, "//person:name", Arrays.asList("visibility"));
@ -136,64 +129,6 @@ public class OrcidParser {
 		}
 	}

-	public List<Work> parseWorks(final String xml) {
-
-		try {
-			String oid;
-
-			generateParsedDocument(xml);
-			List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
-				.getTextValuesWithAttributes(ap, vn, "//activities:works", Arrays.asList("path", "visibility"));
-			if (!workNodes.isEmpty()) {
-				oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
-
-			} else {
-				return null;
-			}
-			final List<Work> works = new ArrayList<>();
-			ap.selectXPath("//work:work-summary");
-
-			while (ap.evalXPath() != -1) {
-				final Work work = new Work();
-				work.setOrcid(oid);
-				final AutoPilot ap1 = new AutoPilot(ap.getNav());
-				ap1.selectXPath("./work:title/common:title");
-				while (ap1.evalXPath() != -1) {
-					int it = vn.getText();
-					work.setTitle(vn.toNormalizedString(it));
-				}
-				ap1.selectXPath(".//common:external-id");
-				while (ap1.evalXPath() != -1) {
-					final Pid pid = new Pid();
-
-					final AutoPilot ap2 = new AutoPilot(ap1.getNav());
-
-					ap2.selectXPath("./common:external-id-type");
-					while (ap2.evalXPath() != -1) {
-						int it = vn.getText();
-						pid.setSchema(vn.toNormalizedString(it));
-					}
-					ap2.selectXPath("./common:external-id-value");
-					while (ap2.evalXPath() != -1) {
-						int it = vn.getText();
-						pid.setValue(vn.toNormalizedString(it));
-					}
-
-					work.addPid(pid);
-				}
-
-				works.add(work);
-			}
-			return works;
-
-		} catch (Throwable e) {
-			log.error("Error on parsing {}", xml);
-			log.error(e.getMessage());
-			return null;
-		}
-
-	}
-
 	public Work parseWork(final String xml) {

 		try {
@ -241,15 +176,11 @@ public class OrcidParser {
 	}

 	private String extractEmploymentDate(final String xpath) throws Exception {
-		return extractEmploymentDate(xpath, ap);
-	}

-	private String extractEmploymentDate(final String xpath, AutoPilot pp) throws Exception {
-
-		pp.selectXPath(xpath);
+		ap.selectXPath(xpath);
 		StringBuilder sb = new StringBuilder();
-		while (pp.evalXPath() != -1) {
-			final AutoPilot ap1 = new AutoPilot(pp.getNav());
+		while (ap.evalXPath() != -1) {
+			final AutoPilot ap1 = new AutoPilot(ap.getNav());
 			ap1.selectXPath("./common:year");
 			while (ap1.evalXPath() != -1) {
 				int it = vn.getText();
@ -272,104 +203,6 @@ public class OrcidParser {

 	}

-	public List<Employment> parseEmployments(final String xml) {
-		try {
-			String oid;
-			Map<String, String> nsContext = getNameSpaceMap();
-			DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
-			Document doc = DocumentHelper.parseText(xml);
-			oid = doc.valueOf("//activities:employments/@path");
-			if (oid == null || StringUtils.isEmpty(oid))
-				return null;
-			final String orcid = oid.split("/")[1];
-
-			List<Node> nodes = doc.selectNodes("//employment:employment-summary");
-			return nodes.stream().map(n -> {
-				final Employment e = new Employment();
-				e.setOrcid(orcid);
-
-				final String depName = n.valueOf(".//common:department-name");
-				if (StringUtils.isNotBlank(depName))
-					e.setDepartmentName(depName);
-				final String roleTitle = n.valueOf(".//common:role-title");
-				e.setRoleTitle(roleTitle);
-				final String organizationName = n.valueOf(".//common:organization/common:name");
-				if (StringUtils.isEmpty(e.getDepartmentName()))
-					e.setDepartmentName(organizationName);
-				final Pid p = new Pid();
-				final String pid = n
-					.valueOf(
-						"./common:organization/common:disambiguated-organization/common:disambiguated-organization-identifier");
-				p.setValue(pid);
-				final String pidType = n
-					.valueOf("./common:organization/common:disambiguated-organization/common:disambiguation-source");
-				p.setSchema(pidType);
-				e.setAffiliationId(p);
-
-				final StringBuilder aDate = new StringBuilder();
-				final String sy = n.valueOf("./common:start-date/common:year");
-				if (StringUtils.isNotBlank(sy)) {
-					aDate.append(sy);
-					final String sm = n.valueOf("./common:start-date/common:month");
-					final String sd = n.valueOf("./common:start-date/common:day");
-					aDate.append("-");
-					if (StringUtils.isNotBlank(sm))
-						aDate.append(sm);
-					else
-						aDate.append("01");
-					aDate.append("-");
-					if (StringUtils.isNotBlank(sd))
-						aDate.append(sd);
-					else
-						aDate.append("01");
-					e.setEndDate(aDate.toString());
-				}
-
-				final String ey = n.valueOf("./common:end-date/common:year");
-				if (StringUtils.isNotBlank(ey)) {
-					aDate.append(ey);
-					final String em = n.valueOf("./common:end-date/common:month");
-					final String ed = n.valueOf("./common:end-date/common:day");
-					aDate.append("-");
-					if (StringUtils.isNotBlank(em))
-						aDate.append(em);
-					else
-						aDate.append("01");
-					aDate.append("-");
-					if (StringUtils.isNotBlank(ed))
-						aDate.append(ed);
-					else
-						aDate.append("01");
-					e.setEndDate(aDate.toString());
-				}
-
-				return e;
-
-			}).collect(Collectors.toList());
-		} catch (Throwable e) {
-			log.error("Error on parsing {}", xml);
-			log.error(e.getMessage());
-			return null;
-		}
-	}
-
-	@NotNull
-	private static Map<String, String> getNameSpaceMap() {
-		Map<String, String> nsContext = new HashMap<>();
-		nsContext.put(NS_COMMON, NS_COMMON_URL);
-		nsContext.put(NS_PERSON, NS_PERSON_URL);
-		nsContext.put(NS_DETAILS, NS_DETAILS_URL);
-		nsContext.put(NS_OTHER, NS_OTHER_URL);
-		nsContext.put(NS_RECORD, NS_RECORD_URL);
-		nsContext.put(NS_ERROR, NS_ERROR_URL);
-		nsContext.put(NS_HISTORY, NS_HISTORY_URL);
-		nsContext.put(NS_WORK, NS_WORK_URL);
-		nsContext.put(NS_EXTERNAL, NS_EXTERNAL_URL);
-		nsContext.put(NS_ACTIVITIES, NS_ACTIVITIES_URL);
-		nsContext.put(NS_EMPLOYMENT, NS_EMPLOYMENT_URL);
-		return nsContext;
-	}
-
 	public Employment parseEmployment(final String xml) {
 		try {
 			final Employment employment = new Employment();
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
@ -18,8 +18,6 @@ public class Author extends ORCIDItem {

 	private String biography;

-	private String lastModifiedDate;
-
 	public String getBiography() {
 		return biography;
 	}
@ -76,14 +74,6 @@ public class Author extends ORCIDItem {
 		this.otherPids = otherPids;
 	}

-	public String getLastModifiedDate() {
-		return lastModifiedDate;
-	}
-
-	public void setLastModifiedDate(String lastModifiedDate) {
-		this.lastModifiedDate = lastModifiedDate;
-	}
-
 	public void addOtherPid(final Pid pid) {

 		if (otherPids == null)
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@ -18,11 +18,7 @@ import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
+import javax.xml.xpath.*;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@ -39,7 +35,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
 import eu.dnetlib.dhp.common.collection.HttpClientParams;

 /**
- * log.info(...) equal to log.trace(...) in the application-logs
+ * log.info(...) equal to  log.trace(...) in the application-logs
 * <p>
 * known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
 *
@ -51,12 +47,13 @@ public class RestIterator implements Iterator<String> {

 	private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
 	public static final String UTF_8 = "UTF-8";
-	private static final int MAX_ATTEMPTS = 5;

 	private final HttpClientParams clientParams;

 	private final String BASIC = "basic";

+	private final JsonUtils jsonUtils;
+
 	private final String baseUrl;
 	private final String resumptionType;
 	private final String resumptionParam;
@ -65,9 +62,8 @@ public class RestIterator implements Iterator<String> {
 	private final int resultSizeValue;
 	private int resumptionInt = 0; // integer resumption token (first record to harvest)
 	private int resultTotal = -1;
-	private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to
-																			// harvest
-	// or token scanned from results)
+	private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest
+																	// or token scanned from results)
 	private InputStream resultStream;
 	private Transformer transformer;
 	private XPath xpath;
@ -79,7 +75,7 @@ public class RestIterator implements Iterator<String> {
 	private final String querySize;
 	private final String authMethod;
 	private final String authToken;
-	private final Queue<String> recordQueue = new PriorityBlockingQueue<>();
+	private final Queue<String> recordQueue = new PriorityBlockingQueue<String>();
 	private int discoverResultSize = 0;
 	private int pagination = 1;
 	/*
@ -89,8 +85,8 @@ public class RestIterator implements Iterator<String> {
 	 */
 	private final String resultOutputFormat;

-	/**
-	 * RestIterator class compatible to version 1.3.33
+	/** RestIterator class
+	 *  compatible to version 1.3.33
 	 */
 	public RestIterator(
 		final HttpClientParams clientParams,
@ -110,46 +106,44 @@ public class RestIterator implements Iterator<String> {
 		final String resultOutputFormat) {

 		this.clientParams = clientParams;
+		this.jsonUtils = new JsonUtils();
 		this.baseUrl = baseUrl;
 		this.resumptionType = resumptionType;
 		this.resumptionParam = resumptionParam;
 		this.resultFormatValue = resultFormatValue;
-		this.resultSizeValue = Integer.parseInt(resultSizeValueStr);
+		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
 		this.queryParams = queryParams;
 		this.authMethod = authMethod;
 		this.authToken = authToken;
 		this.resultOutputFormat = resultOutputFormat;

-		this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
-			: "";
-		this.querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr
+		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
 			: "";
+		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";

 		try {
 			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
-		} catch (final Exception e) {
+		} catch (Exception e) {
 			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
 		}
-
 		initQueue();
 	}

-	private void initXmlTransformation(final String resultTotalXpath, final String resumptionXpath,
-		final String entityXpath)
+	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
 		throws TransformerConfigurationException, XPathExpressionException {
 		final TransformerFactory factory = TransformerFactory.newInstance();
-		this.transformer = factory.newTransformer();
-		this.transformer.setOutputProperty(OutputKeys.INDENT, "yes");
-		this.transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
-		this.xpath = XPathFactory.newInstance().newXPath();
-		this.xprResultTotalPath = this.xpath.compile(resultTotalXpath);
-		this.xprResumptionPath = this.xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
-		this.xprEntity = this.xpath.compile(entityXpath);
+		transformer = factory.newTransformer();
+		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
+		xpath = XPathFactory.newInstance().newXPath();
+		xprResultTotalPath = xpath.compile(resultTotalXpath);
+		xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
+		xprEntity = xpath.compile(entityXpath);
 	}

 	private void initQueue() {
-		this.query = this.baseUrl + "?" + this.queryParams + this.querySize + this.queryFormat;
-		log.info("REST calls starting with {}", this.query);
+		query = baseUrl + "?" + queryParams + querySize + queryFormat;
+		log.info("REST calls starting with {}", query);
 	}

 	private void disconnect() {
@ -162,11 +156,12 @@ public class RestIterator implements Iterator<String> {
 	 */
 	@Override
 	public boolean hasNext() {
-		if (this.recordQueue.isEmpty() && this.query.isEmpty()) {
+		if (recordQueue.isEmpty() && query.isEmpty()) {
 			disconnect();
 			return false;
+		} else {
+			return true;
 		}
-		return true;
 	}

 	/*
@ -175,241 +170,214 @@ public class RestIterator implements Iterator<String> {
 	 */
 	@Override
 	public String next() {
-		synchronized (this.recordQueue) {
-			while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
+		synchronized (recordQueue) {
+			while (recordQueue.isEmpty() && !query.isEmpty()) {
 				try {
-					this.query = downloadPage(this.query, 0);
-				} catch (final CollectorException e) {
+					query = downloadPage(query);
+				} catch (CollectorException e) {
 					log.debug("CollectorPlugin.next()-Exception: {}", e);
 					throw new RuntimeException(e);
 				}
 			}
-			return this.recordQueue.poll();
+			return recordQueue.poll();
 		}
 	}

 	/*
-	 * download page and return nextQuery (with number of attempt)
+	 * download page and return nextQuery
 	 */
-	private String downloadPage(String query, final int attempt) throws CollectorException {
+	private String downloadPage(String query) throws CollectorException {
+		String resultJson;
+		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+		String nextQuery = "";
+		String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
+		Node resultNode = null;
+		NodeList nodeList = null;
+		String qUrlArgument = "";
+		int urlOldResumptionSize = 0;
+		InputStream theHttpInputStream;

-		if (attempt > MAX_ATTEMPTS) {
-			throw new CollectorException("Max Number of attempts reached, query:" + query);
-		}
-
-		if (attempt > 0) {
-			final int delay = (attempt * 5000);
-			log.debug("Attempt {} with delay {}", attempt, delay);
-			try {
-				Thread.sleep(delay);
-			} catch (final InterruptedException e) {
-				new CollectorException(e);
+		// check if cursor=* is initial set otherwise add it to the queryParam URL
+		if (resumptionType.equalsIgnoreCase("deep-cursor")) {
+			log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
+			if (!query.contains("&cursor=")) {
+				query += "&cursor=*";
 			}
 		}

 		try {
-			String resultJson;
-			String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
-			String nextQuery = "";
-			final String emptyXml = resultXml + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG + ">";
-			Node resultNode = null;
-			NodeList nodeList = null;
-			String qUrlArgument = "";
-			int urlOldResumptionSize = 0;
-			InputStream theHttpInputStream;
+			log.info("requestig URL [{}]", query);

-			// check if cursor=* is initial set otherwise add it to the queryParam URL
-			if ("deep-cursor".equalsIgnoreCase(this.resumptionType)) {
-				log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
-				if (!query.contains("&cursor=")) {
-					query += "&cursor=*";
-				}
+			URL qUrl = new URL(query);
+			log.debug("authMethod: {}", authMethod);
+			if ("bearer".equalsIgnoreCase(this.authMethod)) {
+				log.trace("authMethod before inputStream: {}", resultXml);
+				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
+				conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
+				conn.setRequestMethod("GET");
+				theHttpInputStream = conn.getInputStream();
+			} else if (BASIC.equalsIgnoreCase(this.authMethod)) {
+				log.trace("authMethod before inputStream: {}", resultXml);
+				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+				conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
+				conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
+				conn.setRequestMethod("GET");
+				theHttpInputStream = conn.getInputStream();
+			} else {
+				theHttpInputStream = qUrl.openStream();
 			}

-			try {
-				log.info("requesting URL [{}]", query);
+			resultStream = theHttpInputStream;
+			if ("json".equals(resultOutputFormat)) {
+				resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
+				resultXml = jsonUtils.convertToXML(resultJson);
+				resultStream = IOUtils.toInputStream(resultXml, UTF_8);
+			}

-				final URL qUrl = new URL(query);
-				log.debug("authMethod: {}", this.authMethod);
-				if ("bearer".equalsIgnoreCase(this.authMethod)) {
-					log.trace("authMethod before inputStream: {}", resultXml);
-					final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
-					conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + this.authToken);
-					conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
-					conn.setRequestMethod("GET");
-					theHttpInputStream = conn.getInputStream();
-				} else if (this.BASIC.equalsIgnoreCase(this.authMethod)) {
-					log.trace("authMethod before inputStream: {}", resultXml);
-					final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
-					conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + this.authToken);
-					conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
-					conn.setRequestMethod("GET");
-					theHttpInputStream = conn.getInputStream();
-				} else {
-					theHttpInputStream = qUrl.openStream();
+			if (!(emptyXml).equalsIgnoreCase(resultXml)) {
+				resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
+				nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
+				log.debug("nodeList.length: {}", nodeList.getLength());
+				for (int i = 0; i < nodeList.getLength(); i++) {
+					StringWriter sw = new StringWriter();
+					transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
+					String toEnqueue = sw.toString();
+					if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
+						log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
+					} else {
+						recordQueue.add(sw.toString());
+					}
 				}
+			} else {
+				log.warn("resultXml is equal with emptyXml");
+			}

-				this.resultStream = theHttpInputStream;
-				if ("json".equals(this.resultOutputFormat)) {
-					resultJson = IOUtils.toString(this.resultStream, StandardCharsets.UTF_8);
-					resultXml = JsonUtils.convertToXML(resultJson);
-					this.resultStream = IOUtils.toInputStream(resultXml, UTF_8);
-				}
+			resumptionInt += resultSizeValue;

-				if (!(emptyXml).equalsIgnoreCase(resultXml)) {
-					resultNode = (Node) this.xpath
-						.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
-					nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
-					log.debug("nodeList.length: {}", nodeList.getLength());
-					for (int i = 0; i < nodeList.getLength(); i++) {
-						final StringWriter sw = new StringWriter();
-						this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
-						final String toEnqueue = sw.toString();
-						if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue)
-							|| emptyXml.equalsIgnoreCase(toEnqueue)) {
-							log
-								.warn(
-									"The following record resulted in empty item for the feeding queue: {}", resultXml);
-						} else {
-							this.recordQueue.add(sw.toString());
+			switch (resumptionType.toLowerCase()) {
+				case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
+					resumptionStr = xprResumptionPath.evaluate(resultNode);
+					break;
+
+				case "count": // begin at one step for all records, iterate over items
+					resumptionStr = Integer.toString(resumptionInt);
+					break;
+
+				case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
+					if (resultSizeValue < 2) {
+						throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
+					}
+					qUrlArgument = qUrl.getQuery();
+					String[] arrayQUrlArgument = qUrlArgument.split("&");
+					for (String arrayUrlArgStr : arrayQUrlArgument) {
+						if (arrayUrlArgStr.startsWith(resumptionParam)) {
+							String[] resumptionKeyValue = arrayUrlArgStr.split("=");
+							if (isInteger(resumptionKeyValue[1])) {
+								urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
+								log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
+							} else {
+								log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
+							}
 						}
 					}
-				} else {
-					log.warn("resultXml is equal with emptyXml");
-				}

-				this.resumptionInt += this.resultSizeValue;
-
-				switch (this.resumptionType.toLowerCase()) {
-					case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
-						this.resumptionStr = this.xprResumptionPath.evaluate(resultNode);
-						break;
-
-					case "count": // begin at one step for all records, iterate over items
-						this.resumptionStr = Integer.toString(this.resumptionInt);
-						break;
-
-					case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
-						if (this.resultSizeValue < 2) {
-							throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
-						}
-						qUrlArgument = qUrl.getQuery();
-						final String[] arrayQUrlArgument = qUrlArgument.split("&");
-						for (final String arrayUrlArgStr : arrayQUrlArgument) {
-							if (arrayUrlArgStr.startsWith(this.resumptionParam)) {
-								final String[] resumptionKeyValue = arrayUrlArgStr.split("=");
-								if (isInteger(resumptionKeyValue[1])) {
-									urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
-									log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
-								} else {
-									log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
-								}
-							}
-						}
-
-						if (((emptyXml).equalsIgnoreCase(resultXml))
-							|| ((nodeList != null) && (nodeList.getLength() < this.resultSizeValue))) {
-							// resumptionStr = "";
-							if (nodeList != null) {
-								this.discoverResultSize += nodeList.getLength();
-							}
-							this.resultTotal = this.discoverResultSize;
-						} else {
-							this.resumptionStr = Integer.toString(this.resumptionInt);
-							this.resultTotal = this.resumptionInt + 1;
-							if (nodeList != null) {
-								this.discoverResultSize += nodeList.getLength();
-							}
-						}
-						log.info("discoverResultSize: {}", this.discoverResultSize);
-						break;
-
-					case "pagination":
-					case "page": // pagination, iterate over page numbers
-						this.pagination += 1;
+					if (((emptyXml).equalsIgnoreCase(resultXml))
+						|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue))) {
+						// resumptionStr = "";
 						if (nodeList != null) {
-							this.discoverResultSize += nodeList.getLength();
-						} else {
-							this.resultTotal = this.discoverResultSize;
-							this.pagination = this.discoverResultSize;
+							discoverResultSize += nodeList.getLength();
 						}
-						this.resumptionInt = this.pagination;
-						this.resumptionStr = Integer.toString(this.resumptionInt);
-						break;
-
-					case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor
-										// in
-										// solr)
-						// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
-						// deep-cursor, Param 'resultSizeValue' is less than 2");}
-
-						this.resumptionStr = encodeValue(this.xprResumptionPath.evaluate(resultNode));
-						this.queryParams = this.queryParams.replace("&cursor=*", "");
-
-						// terminating if length of nodeList is 0
-						if ((nodeList != null) && (nodeList.getLength() < this.discoverResultSize)) {
-							this.resumptionInt += ((nodeList.getLength() + 1) - this.resultSizeValue);
-						} else {
-							this.resumptionInt += (nodeList.getLength() - this.resultSizeValue); // subtract the
-																									// resultSizeValue
-							// because the iteration is over
-							// real length and the
-							// resultSizeValue is added before
-							// the switch()
+						resultTotal = discoverResultSize;
+					} else {
+						resumptionStr = Integer.toString(resumptionInt);
+						resultTotal = resumptionInt + 1;
+						if (nodeList != null) {
+							discoverResultSize += nodeList.getLength();
 						}
+					}
+					log.info("discoverResultSize: {}", discoverResultSize);
+					break;

-						this.discoverResultSize = nodeList.getLength();
+				case "pagination":
+				case "page": // pagination, iterate over page numbers
+					pagination += 1;
+					if (nodeList != null) {
+						discoverResultSize += nodeList.getLength();
+					} else {
+						resultTotal = discoverResultSize;
+						pagination = discoverResultSize;
+					}
+					resumptionInt = pagination;
+					resumptionStr = Integer.toString(resumptionInt);
+					break;

-						log
-							.debug(
-								"downloadPage().deep-cursor: resumptionStr=" + this.resumptionStr + " ; queryParams="
-									+ this.queryParams + " resumptionLengthIncreased: " + this.resumptionInt);
+				case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in
+									// solr)
+					// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
+					// deep-cursor, Param 'resultSizeValue' is less than 2");}

-						break;
+					resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
+					queryParams = queryParams.replace("&cursor=*", "");

-					default: // otherwise: abort
-						// resultTotal = resumptionInt;
-						break;
-				}
+					// terminating if length of nodeList is 0
+					if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) {
+						resumptionInt += (nodeList.getLength() + 1 - resultSizeValue);
+					} else {
+						resumptionInt += (nodeList.getLength() - resultSizeValue); // subtract the resultSizeValue
+																					// because the iteration is over
+																					// real length and the
+																					// resultSizeValue is added before
+																					// the switch()
+					}

-			} catch (final Exception e) {
-				log.error(e.getMessage(), e);
-				throw new IllegalStateException("collection failed: " + e.getMessage());
+					discoverResultSize = nodeList.getLength();
+
+					log
+						.debug(
+							"downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams="
+								+ queryParams + " resumptionLengthIncreased: " + resumptionInt);
+
+					break;
+
+				default: // otherwise: abort
+					// resultTotal = resumptionInt;
+					break;
 			}

-			try {
-				if (this.resultTotal == -1) {
-					this.resultTotal = Integer.parseInt(this.xprResultTotalPath.evaluate(resultNode));
-					if ("page".equalsIgnoreCase(this.resumptionType) && !this.BASIC.equalsIgnoreCase(this.authMethod)) {
-						this.resultTotal += 1;
-					} // to correct the upper bound
-					log.info("resultTotal was -1 is now: " + this.resultTotal);
-				}
-			} catch (final Exception e) {
-				log.error(e.getMessage(), e);
-				throw new IllegalStateException("downloadPage resultTotal couldn't parse: " + e.getMessage());
-			}
-			log.debug("resultTotal: " + this.resultTotal);
-			log.debug("resInt: " + this.resumptionInt);
-			if (this.resumptionInt <= this.resultTotal) {
-				nextQuery = this.baseUrl + "?" + this.queryParams + this.querySize + "&" + this.resumptionParam + "="
-					+ this.resumptionStr
-					+ this.queryFormat;
-			} else {
-				nextQuery = "";
-				// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the
-				// resumptionInt and prevent a NullPointer Exception at mdStore
-			}
-			log.debug("nextQueryUrl: " + nextQuery);
-			return nextQuery;
-		} catch (final Throwable e) {
-			log.warn(e.getMessage(), e);
-			return downloadPage(query, attempt + 1);
+		} catch (Exception e) {
+			log.error(e.getMessage(), e);
+			throw new IllegalStateException("collection failed: " + e.getMessage());
 		}

+		try {
+			if (resultTotal == -1) {
+				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
+				if (resumptionType.equalsIgnoreCase("page") && !BASIC.equalsIgnoreCase(authMethod)) {
+					resultTotal += 1;
+				} // to correct the upper bound
+				log.info("resultTotal was -1 is now: " + resultTotal);
+			}
+		} catch (Exception e) {
+			log.error(e.getMessage(), e);
+			throw new IllegalStateException("downloadPage resultTotal couldn't parse: " + e.getMessage());
+		}
+		log.debug("resultTotal: " + resultTotal);
+		log.debug("resInt: " + resumptionInt);
+		if (resumptionInt <= resultTotal) {
+			nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr
+				+ queryFormat;
+		} else {
+			nextQuery = "";
+			// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the
+			// resumptionInt and prevent a NullPointer Exception at mdStore
+		}
+		log.debug("nextQueryUrl: " + nextQuery);
+		return nextQuery;
+
 	}

-	private boolean isInteger(final String s) {
+	private boolean isInteger(String s) {
 		boolean isValidInteger = false;
 		try {
 			Integer.parseInt(s);
@ -417,7 +385,7 @@ public class RestIterator implements Iterator<String> {
 			// s is a valid integer

 			isValidInteger = true;
-		} catch (final NumberFormatException ex) {
+		} catch (NumberFormatException ex) {
 			// s is not an integer
 		}

@ -425,20 +393,20 @@ public class RestIterator implements Iterator<String> {
 	}

 	// Method to encode a string value using `UTF-8` encoding scheme
-	private String encodeValue(final String value) {
+	private String encodeValue(String value) {
 		try {
 			return URLEncoder.encode(value, StandardCharsets.UTF_8.toString());
-		} catch (final UnsupportedEncodingException ex) {
+		} catch (UnsupportedEncodingException ex) {
 			throw new RuntimeException(ex.getCause());
 		}
 	}

 	public String getResultFormatValue() {
-		return this.resultFormatValue;
+		return resultFormatValue;
 	}

 	public String getResultOutputFormat() {
-		return this.resultOutputFormat;
+		return resultOutputFormat;
 	}

 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/JsonUtils.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/JsonUtils.java
@ -3,142 +3,82 @@ package eu.dnetlib.dhp.collection.plugin.utils;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.json.JSONArray;
-import org.json.JSONObject;

 public class JsonUtils {
-	public static final String XML_WRAP_TAG = "recordWrap";
-	private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
-	private static final String INVALID_XMLTAG_CHARS = "!\"#$%&'()*+,/;<=>?@[\\]^`{|}~,";

 	private static final Log log = LogFactory.getLog(JsonUtils.class);

+	public static final String wrapName = "recordWrap";
+
 	/**
-	 * cleanup in JSON-KeyName
+	 * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
 	 * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
 	 * and work-around for the JSON to XML converting of org.json.XML-package.
 	 *
-	 * @param input
-	 * @return converted json object
+	 * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
+	 *
+	 * @param jsonInput
+	 * @return convertedJsonKeynameOutput
 	 */
-	public static JSONObject cleanJsonObject(final JSONObject input) {
-		if (null == input) {
-			return null;
+	public String syntaxConvertJsonKeyNames(String jsonInput) {
+
+		log.trace("before convertJsonKeyNames: " + jsonInput);
+		// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
+		// replace ' 's in JSON Namens with '_'
+		while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
 		}

-		JSONObject result = new JSONObject();
-
-		for (String key : input.keySet()) {
-			Object value = input.opt(key);
-			if (value != null) {
-				result.put(cleanKey(key), cleanValue(value));
-			}
+		// replace forward-slash (sign '/' ) in JSON Names with '_'
+		while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
 		}

-		return result;
+		// replace '(' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ')' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// add prefix of startNumbers in JSON Keynames with 'n_'
+		while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
+		}
+		// add prefix of only numbers in JSON Keynames with 'm_'
+		while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
+		}
+
+		// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
+		while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
+		// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
+		// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
+		// }
+
+		// replace '=' in JSON Keynames with '-'
+		while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
+		}
+
+		log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
+		return jsonInput;
 	}

-	private static Object cleanValue(Object object) {
-		if (object instanceof JSONObject) {
-			return cleanJsonObject((JSONObject) object);
-		} else if (object instanceof JSONArray) {
-			JSONArray array = (JSONArray) object;
-			JSONArray res = new JSONArray();
-
-			for (int i = array.length() - 1; i >= 0; i--) {
-				res.put(i, cleanValue(array.opt(i)));
-			}
-			return res;
-		} else if (object instanceof String) {
-			String value = (String) object;
-
-			// XML 1.0 Allowed characters
-			// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
-
-			return value
-				.codePoints()
-				.filter(
-					cp -> cp == 0x9 || cp == 0xA || cp == 0xD || (cp >= 0x20 && cp <= 0xD7FF)
-						|| (cp >= 0xE000 && cp <= 0xFFFD)
-						|| (cp >= 0x10000 && cp <= 0x10FFFF))
-				.collect(
-					StringBuilder::new,
-					StringBuilder::appendCodePoint,
-					StringBuilder::append)
-				.toString();
-		}
-
-		return object;
-	}
-
-	private static String cleanKey(String key) {
-		if (key == null || key.isEmpty()) {
-			return key;
-		}
-
-		// xml tag cannot begin with "-", ".", or a numeric digit.
-		switch (key.charAt(0)) {
-			case '-':
-			case '.':
-				key = "_" + key.substring(1);
-				break;
-		}
-
-		if (Character.isDigit(key.charAt(0))) {
-			if (key.matches("^[0-9]+$")) {
-				// add prefix of only numbers in JSON Keynames with 'm_'
-				key = "m_" + key;
-			} else {
-				// add prefix of startNumbers in JSON Keynames with 'n_'
-				key = "n_" + key;
-			}
-		}
-
-		StringBuilder res = new StringBuilder(key.length());
-		for (int i = 0; i < key.length(); i++) {
-			char c = key.charAt(i);
-
-			// sequence of whitespaces are rendered as a single '_'
-			if (Character.isWhitespace(c)) {
-				while (i + 1 < key.length() && Character.isWhitespace(key.charAt(i + 1))) {
-					i++;
-				}
-				res.append('_');
-			}
-			// remove invalid chars for xml tags with the expception of '=' and '/'
-			else if (INVALID_XMLTAG_CHARS.indexOf(c) >= 0) {
-				switch (c) {
-					case '=':
-						res.append('-');
-						break;
-					case '/':
-						res.append('_');
-						break;
-					default:
-						break;
-				}
-				// nothing
-			}
-			// all other chars are kept
-			else {
-				res.append(c);
-			}
-		}
-
-		return res.toString();
-	}
-
-	static public String convertToXML(final String jsonRecord) {
-		if (log.isTraceEnabled()) {
-			log.trace("input json: " + jsonRecord);
-		}
-
-		JSONObject jsonObject = cleanJsonObject(new org.json.JSONObject(jsonRecord));
-		String res = XML_HEADER + org.json.XML.toString(jsonObject, XML_WRAP_TAG); // wrap xml in single root element
-
-		if (log.isTraceEnabled()) {
-			log.trace("outout xml: " + res);
-		}
-		return res;
+	public String convertToXML(final String jsonRecord) {
+		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+		org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
+		resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
+		log.trace("before inputStream: " + resultXml);
+		resultXml = XmlCleaner.cleanAllEntities(resultXml);
+		log.trace("after cleaning: " + resultXml);
+		return resultXml;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
@ -48,30 +48,23 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
 	@Override
 	public MetadataRecord call(MetadataRecord value) {
 		aggregationCounter.getTotalItems().add(1);
-
-		Processor processor = new Processor(false);
-
-		processor.registerExtensionFunction(cleanFunction);
-		processor.registerExtensionFunction(new DateCleaner());
-		processor.registerExtensionFunction(new PersonCleaner());
-
-		final XsltCompiler comp = processor.newXsltCompiler();
-		QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
-		comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
-		QName datasourceNameParam = new QName(DATASOURCE_NAME_PARAM);
-		comp.setParameter(datasourceNameParam, new XdmAtomicValue(value.getProvenance().getDatasourceName()));
-		XsltExecutable xslt;
-		XdmNode source;
 		try {
-			xslt = comp
+			Processor processor = new Processor(false);
+
+			processor.registerExtensionFunction(cleanFunction);
+			processor.registerExtensionFunction(new DateCleaner());
+			processor.registerExtensionFunction(new PersonCleaner());
+
+			final XsltCompiler comp = processor.newXsltCompiler();
+			QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
+			comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
+			QName datasourceNameParam = new QName(DATASOURCE_NAME_PARAM);
+			comp.setParameter(datasourceNameParam, new XdmAtomicValue(value.getProvenance().getDatasourceName()));
+			XsltExecutable xslt = comp
 				.compile(new StreamSource(IOUtils.toInputStream(transformationRule, StandardCharsets.UTF_8)));
-			source = processor
+			XdmNode source = processor
 				.newDocumentBuilder()
 				.build(new StreamSource(IOUtils.toInputStream(value.getBody(), StandardCharsets.UTF_8)));
-		} catch (Throwable e) {
-			throw new RuntimeException("Error on parsing xslt", e);
-		}
-		try {
 			XsltTransformer trans = xslt.load();
 			trans.setInitialContextNode(source);
 			final StringWriter output = new StringWriter();
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}`