Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop

2023-12-18 11:47:17 +01:00 · 2023-12-18 11:47:17 +01:00 · 1fbd4325f5
parent 1f1a6a5f5f c4ec35b6cd
commit 1fbd4325f5
302 changed files with 12684 additions and 4060 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -51,6 +51,7 @@ public class Constants {
 	public static final String RETRY_DELAY = "retryDelay";
 	public static final String CONNECT_TIMEOUT = "connectTimeOut";
 	public static final String READ_TIMEOUT = "readTimeOut";
+	public static final String REQUEST_METHOD = "requestMethod";
 	public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
 	public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -1,53 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import okhttp3.MediaType;
-import okhttp3.RequestBody;
-import okhttp3.internal.Util;
-import okio.BufferedSink;
-import okio.Okio;
-import okio.Source;
-
-public class InputStreamRequestBody extends RequestBody {
-
-	private final InputStream inputStream;
-	private final MediaType mediaType;
-	private final long lenght;
-
-	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
-
-		return new InputStreamRequestBody(inputStream, mediaType, len);
-	}
-
-	private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
-		this.inputStream = inputStream;
-		this.mediaType = mediaType;
-		this.lenght = len;
-	}
-
-	@Override
-	public MediaType contentType() {
-		return mediaType;
-	}
-
-	@Override
-	public long contentLength() {
-
-		return lenght;
-
-	}
-
-	@Override
-	public void writeTo(BufferedSink sink) throws IOException {
-		Source source = null;
-		try {
-			source = Okio.source(inputStream);
-			sink.writeAll(source);
-		} finally {
-			Util.closeQuietly(source);
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@ -1,8 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-public class MissingConceptDoiException extends Throwable {
-	public MissingConceptDoiException(String message) {
-		super(message);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -1,365 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.*;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.http.HttpHeaders;
-import org.apache.http.entity.ContentType;
-import org.jetbrains.annotations.NotNull;
-
-import com.google.gson.Gson;
-
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
-import okhttp3.*;
-
-public class ZenodoAPIClient implements Serializable {
-
-	String urlString;
-	String bucket;
-
-	String deposition_id;
-	String access_token;
-
-	public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
-
-	private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
-
-	public String getUrlString() {
-		return urlString;
-	}
-
-	public void setUrlString(String urlString) {
-		this.urlString = urlString;
-	}
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public void setDeposition_id(String deposition_id) {
-		this.deposition_id = deposition_id;
-	}
-
-	public ZenodoAPIClient(String urlString, String access_token) {
-
-		this.urlString = urlString;
-		this.access_token = access_token;
-	}
-
-	/**
-	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int newDeposition() throws IOException {
-		String json = "{}";
-
-		URL url = new URL(urlString);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
-		this.bucket = newSubmission.getLinks().getBucket();
-		this.deposition_id = newSubmission.getId();
-
-		return responseCode;
-	}
-
-	/**
-	 * Upload files in Zenodo.
-	 *
-	 * @param is the inputStream for the file to upload
-	 * @param file_name the name of the file as it will appear on Zenodo
-	 * @return the response code
-	 */
-	public int uploadIS(InputStream is, String file_name) throws IOException {
-
-		URL url = new URL(bucket + "/" + file_name);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		byte[] buf = new byte[8192];
-		int length;
-		try (OutputStream os = conn.getOutputStream()) {
-			while ((length = is.read(buf)) != -1) {
-				os.write(buf, 0, length);
-			}
-
-		}
-		int responseCode = conn.getResponseCode();
-		if (!checkOKStatus(responseCode)) {
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-		}
-
-		return responseCode;
-	}
-
-	@NotNull
-	private String getBody(HttpURLConnection conn) throws IOException {
-		String body = "{}";
-		try (BufferedReader br = new BufferedReader(
-			new InputStreamReader(conn.getInputStream(), "utf-8"))) {
-			StringBuilder response = new StringBuilder();
-			String responseLine = null;
-			while ((responseLine = br.readLine()) != null) {
-				response.append(responseLine.trim());
-			}
-
-			body = response.toString();
-
-		}
-		return body;
-	}
-
-	/**
-	 * Associates metadata information to the current deposition
-	 *
-	 * @param metadata the metadata
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int sendMretadata(String metadata) throws IOException {
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = metadata.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		final int responseCode = conn.getResponseCode();
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-
-		return responseCode;
-
-	}
-
-	private boolean checkOKStatus(int responseCode) {
-
-		if (HttpURLConnection.HTTP_OK != responseCode ||
-			HttpURLConnection.HTTP_CREATED != responseCode)
-			return true;
-		return false;
-	}
-
-	/**
-	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	@Deprecated
-	public int publish() throws IOException {
-
-		String json = "{}";
-
-		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
-
-		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
-
-		Request request = new Request.Builder()
-			.url(urlString + "/" + deposition_id + "/actions/publish")
-			.addHeader("Authorization", "Bearer " + access_token)
-			.post(body)
-			.build();
-
-		try (Response response = httpClient.newCall(request).execute()) {
-
-			if (!response.isSuccessful())
-				throw new IOException("Unexpected code " + response + response.body().string());
-
-			return response.code();
-
-		}
-	}
-
-	/**
-	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
-	 * for the new version.
-	 *
-	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
-	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 * @throws IOException
-	 * @throws MissingConceptDoiException
-	 */
-	public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
-		setDepositionId(concept_rec_id, 1);
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("POST");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		String latest_draft = zenodoModel.getLinks().getLatest_draft();
-		deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
-		bucket = getBucket(latest_draft);
-
-		return responseCode;
-
-	}
-
-	/**
-	 * To finish uploading a version or new deposition not published
-	 * It sets the deposition_id and the bucket to be used
-	 *
-	 *
-	 * @param deposition_id the deposition id of the not yet published upload
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 * @throws IOException
-	 * @throws MissingConceptDoiException
-	 */
-	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
-
-		this.deposition_id = deposition_id;
-
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		bucket = zenodoModel.getLinks().getBucket();
-
-		return responseCode;
-
-	}
-
-	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
-
-		ZenodoModelList zenodoModelList = new Gson()
-			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
-
-		for (ZenodoModel zm : zenodoModelList) {
-			if (zm.getConceptrecid().equals(concept_rec_id)) {
-				deposition_id = zm.getId();
-				return;
-			}
-		}
-		if (zenodoModelList.size() == 0)
-			throw new MissingConceptDoiException(
-				"The concept record id specified was missing in the list of depositions");
-		setDepositionId(concept_rec_id, page + 1);
-
-	}
-
-	private String getPrevDepositions(String page) throws IOException {
-
-		HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
-		urlBuilder.addQueryParameter("page", page);
-
-		URL url = new URL(urlBuilder.build().toString());
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		return body;
-
-	}
-
-	private String getBucket(String inputUurl) throws IOException {
-
-		URL url = new URL(inputUurl);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-
-		return zenodoModel.getLinks().getBucket();
-
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@ -1,14 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Community {
-	private String identifier;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Creator {
-	private String affiliation;
-	private String name;
-	private String orcid;
-
-	public String getAffiliation() {
-		return affiliation;
-	}
-
-	public void setAffiliation(String affiliation) {
-		this.affiliation = affiliation;
-	}
-
-	public String getName() {
-		return name;
-	}
-
-	public void setName(String name) {
-		this.name = name;
-	}
-
-	public String getOrcid() {
-		return orcid;
-	}
-
-	public void setOrcid(String orcid) {
-		this.orcid = orcid;
-	}
-
-	public static Creator newInstance(String name, String affiliation, String orcid) {
-		Creator c = new Creator();
-		if (name != null) {
-			c.name = name;
-		}
-		if (affiliation != null) {
-			c.affiliation = affiliation;
-		}
-		if (orcid != null) {
-			c.orcid = orcid;
-		}
-
-		return c;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -1,44 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class File implements Serializable {
-	private String checksum;
-	private String filename;
-	private long filesize;
-	private String id;
-
-	public String getChecksum() {
-		return checksum;
-	}
-
-	public void setChecksum(String checksum) {
-		this.checksum = checksum;
-	}
-
-	public String getFilename() {
-		return filename;
-	}
-
-	public void setFilename(String filename) {
-		this.filename = filename;
-	}
-
-	public long getFilesize() {
-		return filesize;
-	}
-
-	public void setFilesize(long filesize) {
-		this.filesize = filesize;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@ -1,23 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Grant implements Serializable {
-	private String id;
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public static Grant newInstance(String id) {
-		Grant g = new Grant();
-		g.id = id;
-
-		return g;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@ -1,92 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Links implements Serializable {
-
-	private String bucket;
-
-	private String discard;
-
-	private String edit;
-	private String files;
-	private String html;
-	private String latest_draft;
-	private String latest_draft_html;
-	private String publish;
-
-	private String self;
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public String getDiscard() {
-		return discard;
-	}
-
-	public void setDiscard(String discard) {
-		this.discard = discard;
-	}
-
-	public String getEdit() {
-		return edit;
-	}
-
-	public void setEdit(String edit) {
-		this.edit = edit;
-	}
-
-	public String getFiles() {
-		return files;
-	}
-
-	public void setFiles(String files) {
-		this.files = files;
-	}
-
-	public String getHtml() {
-		return html;
-	}
-
-	public void setHtml(String html) {
-		this.html = html;
-	}
-
-	public String getLatest_draft() {
-		return latest_draft;
-	}
-
-	public void setLatest_draft(String latest_draft) {
-		this.latest_draft = latest_draft;
-	}
-
-	public String getLatest_draft_html() {
-		return latest_draft_html;
-	}
-
-	public void setLatest_draft_html(String latest_draft_html) {
-		this.latest_draft_html = latest_draft_html;
-	}
-
-	public String getPublish() {
-		return publish;
-	}
-
-	public void setPublish(String publish) {
-		this.publish = publish;
-	}
-
-	public String getSelf() {
-		return self;
-	}
-
-	public void setSelf(String self) {
-		this.self = self;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@ -1,153 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class Metadata implements Serializable {
-
-	private String access_right;
-	private List<Community> communities;
-	private List<Creator> creators;
-	private String description;
-	private String doi;
-	private List<Grant> grants;
-	private List<String> keywords;
-	private String language;
-	private String license;
-	private PrereserveDoi prereserve_doi;
-	private String publication_date;
-	private List<String> references;
-	private List<RelatedIdentifier> related_identifiers;
-	private String title;
-	private String upload_type;
-	private String version;
-
-	public String getUpload_type() {
-		return upload_type;
-	}
-
-	public void setUpload_type(String upload_type) {
-		this.upload_type = upload_type;
-	}
-
-	public String getVersion() {
-		return version;
-	}
-
-	public void setVersion(String version) {
-		this.version = version;
-	}
-
-	public String getAccess_right() {
-		return access_right;
-	}
-
-	public void setAccess_right(String access_right) {
-		this.access_right = access_right;
-	}
-
-	public List<Community> getCommunities() {
-		return communities;
-	}
-
-	public void setCommunities(List<Community> communities) {
-		this.communities = communities;
-	}
-
-	public List<Creator> getCreators() {
-		return creators;
-	}
-
-	public void setCreators(List<Creator> creators) {
-		this.creators = creators;
-	}
-
-	public String getDescription() {
-		return description;
-	}
-
-	public void setDescription(String description) {
-		this.description = description;
-	}
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public List<Grant> getGrants() {
-		return grants;
-	}
-
-	public void setGrants(List<Grant> grants) {
-		this.grants = grants;
-	}
-
-	public List<String> getKeywords() {
-		return keywords;
-	}
-
-	public void setKeywords(List<String> keywords) {
-		this.keywords = keywords;
-	}
-
-	public String getLanguage() {
-		return language;
-	}
-
-	public void setLanguage(String language) {
-		this.language = language;
-	}
-
-	public String getLicense() {
-		return license;
-	}
-
-	public void setLicense(String license) {
-		this.license = license;
-	}
-
-	public PrereserveDoi getPrereserve_doi() {
-		return prereserve_doi;
-	}
-
-	public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
-		this.prereserve_doi = prereserve_doi;
-	}
-
-	public String getPublication_date() {
-		return publication_date;
-	}
-
-	public void setPublication_date(String publication_date) {
-		this.publication_date = publication_date;
-	}
-
-	public List<String> getReferences() {
-		return references;
-	}
-
-	public void setReferences(List<String> references) {
-		this.references = references;
-	}
-
-	public List<RelatedIdentifier> getRelated_identifiers() {
-		return related_identifiers;
-	}
-
-	public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
-		this.related_identifiers = related_identifiers;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@ -1,25 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class PrereserveDoi implements Serializable {
-	private String doi;
-	private String recid;
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public String getRecid() {
-		return recid;
-	}
-
-	public void setRecid(String recid) {
-		this.recid = recid;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@ -1,43 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class RelatedIdentifier implements Serializable {
-	private String identifier;
-	private String relation;
-	private String resource_type;
-	private String scheme;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-
-	public String getRelation() {
-		return relation;
-	}
-
-	public void setRelation(String relation) {
-		this.relation = relation;
-	}
-
-	public String getResource_type() {
-		return resource_type;
-	}
-
-	public void setResource_type(String resource_type) {
-		this.resource_type = resource_type;
-	}
-
-	public String getScheme() {
-		return scheme;
-	}
-
-	public void setScheme(String scheme) {
-		this.scheme = scheme;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@ -1,118 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class ZenodoModel implements Serializable {
-
-	private String conceptrecid;
-	private String created;
-
-	private List<File> files;
-	private String id;
-	private Links links;
-	private Metadata metadata;
-	private String modified;
-	private String owner;
-	private String record_id;
-	private String state;
-	private boolean submitted;
-	private String title;
-
-	public String getConceptrecid() {
-		return conceptrecid;
-	}
-
-	public void setConceptrecid(String conceptrecid) {
-		this.conceptrecid = conceptrecid;
-	}
-
-	public String getCreated() {
-		return created;
-	}
-
-	public void setCreated(String created) {
-		this.created = created;
-	}
-
-	public List<File> getFiles() {
-		return files;
-	}
-
-	public void setFiles(List<File> files) {
-		this.files = files;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public Links getLinks() {
-		return links;
-	}
-
-	public void setLinks(Links links) {
-		this.links = links;
-	}
-
-	public Metadata getMetadata() {
-		return metadata;
-	}
-
-	public void setMetadata(Metadata metadata) {
-		this.metadata = metadata;
-	}
-
-	public String getModified() {
-		return modified;
-	}
-
-	public void setModified(String modified) {
-		this.modified = modified;
-	}
-
-	public String getOwner() {
-		return owner;
-	}
-
-	public void setOwner(String owner) {
-		this.owner = owner;
-	}
-
-	public String getRecord_id() {
-		return record_id;
-	}
-
-	public void setRecord_id(String record_id) {
-		this.record_id = record_id;
-	}
-
-	public String getState() {
-		return state;
-	}
-
-	public void setState(String state) {
-		this.state = state;
-	}
-
-	public boolean isSubmitted() {
-		return submitted;
-	}
-
-	public void setSubmitted(boolean submitted) {
-		this.submitted = submitted;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@ -1,7 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.util.ArrayList;
-
-public class ZenodoModelList extends ArrayList<ZenodoModel> {
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
@ -1,6 +1,9 @@

 package eu.dnetlib.dhp.common.collection;

+import java.util.HashMap;
+import java.util.Map;
+
 /**
 * Bundles the http connection parameters driving the client behaviour.
 */
@ -13,6 +16,8 @@ public class HttpClientParams {
 	public static int _connectTimeOut = 10; // seconds
 	public static int _readTimeOut = 30; // seconds

+	public static String _requestMethod = "GET";
+
 	/**
 	 * Maximum number of allowed retires before failing
 	 */
@ -38,17 +43,30 @@ public class HttpClientParams {
 	 */
 	private int readTimeOut;

+	/**
+	 * Custom http headers
+	 */
+	private Map<String, String> headers;
+
+	/**
+	 * Request method (i.e., GET, POST etc)
+	 */
+	private String requestMethod;
+
 	public HttpClientParams() {
-		this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
+		this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(),
+			_requestMethod);
 	}

 	public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
-		int readTimeOut) {
+		int readTimeOut, Map<String, String> headers, String requestMethod) {
 		this.maxNumberOfRetry = maxNumberOfRetry;
 		this.requestDelay = requestDelay;
 		this.retryDelay = retryDelay;
 		this.connectTimeOut = connectTimeOut;
 		this.readTimeOut = readTimeOut;
+		this.headers = headers;
+		this.requestMethod = requestMethod;
 	}

 	public int getMaxNumberOfRetry() {
@ -91,4 +109,19 @@ public class HttpClientParams {
 		this.readTimeOut = readTimeOut;
 	}

+	public Map<String, String> getHeaders() {
+		return headers;
+	}
+
+	public void setHeaders(Map<String, String> headers) {
+		this.headers = headers;
+	}
+
+	public String getRequestMethod() {
+		return requestMethod;
+	}
+
+	public void setRequestMethod(String requestMethod) {
+		this.requestMethod = requestMethod;
+	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
@ -107,7 +107,14 @@ public class HttpConnector2 {
 			urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
 			urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
 			urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
+			urlConn.setRequestMethod(getClientParams().getRequestMethod());

+			// if provided, add custom headers
+			if (!getClientParams().getHeaders().isEmpty()) {
+				for (Map.Entry<String, String> headerEntry : getClientParams().getHeaders().entrySet()) {
+					urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue());
+				}
+			}
 			if (log.isDebugEnabled()) {
 				logHeaderFields(urlConn);
 			}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
@ -63,7 +63,10 @@ public class Vocabulary implements Serializable {
 	}

 	public VocabularyTerm getTermBySynonym(final String syn) {
-		return getTerm(synonyms.get(syn.toLowerCase()));
+		return Optional
+			.ofNullable(syn)
+			.map(s -> getTerm(synonyms.get(s.toLowerCase())))
+			.orElse(null);
 	}

 	public Qualifier getTermAsQualifier(final String termId) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
 		return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
 	}

+	public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
+		return find(vocId)
+			.map(
+				vocabulary -> Optional
+					.ofNullable(vocabulary.getTerm(syn))
+					.map(
+						term -> OafMapperUtils
+							.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
+					.orElse(
+						Optional
+							.ofNullable(vocabulary.getTermBySynonym(syn))
+							.map(
+								term -> OafMapperUtils
+									.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
+							.orElse(null)))
+			.orElse(null);
+	}
+
 	/**
 	 * getSynonymAsQualifierCaseSensitive
 	 *
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -119,6 +119,131 @@ public class AuthorMerger {
 				});
 	}

+	public static String normalizeFullName(final String fullname) {
+		return nfd(fullname)
+			.toLowerCase()
+			// do not compact the regexes in a single expression, would cause StackOverflowError
+			// in case
+			// of large input strings
+			.replaceAll("(\\W)+", " ")
+			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+			.replaceAll("(\\p{Punct})+", " ")
+			.replaceAll("(\\d)+", " ")
+			.replaceAll("(\\n)+", " ")
+
+			.trim();
+	}
+
+	private static String authorFieldToBeCompared(Author author) {
+		if (StringUtils.isNotBlank(author.getSurname())) {
+			return author.getSurname();
+
+		}
+		if (StringUtils.isNotBlank(author.getFullname())) {
+			return author.getFullname();
+		}
+		return null;
+	}
+
+	/**
+	 * This method tries to figure out when two author are the same in the contest
+	 * of ORCID enrichment
+	 *
+	 * @param left  Author in the OAF entity
+	 * @param right Author ORCID
+	 * @return based on a heuristic on the names of the authors if they are the same.
+	 */
+	public static boolean checkORCIDSimilarity(final Author left, final Author right) {
+		final Person pl = parse(left);
+		final Person pr = parse(right);
+
+		// If one of them didn't have a surname we verify if they have the fullName not empty
+		// and verify if the normalized version is equal
+		if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
+			pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
+
+			if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
+				&& !pr.getFullname().isEmpty()) {
+				return pl
+					.getFullname()
+					.stream()
+					.anyMatch(
+						fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
+			} else {
+				return false;
+			}
+		}
+		// The Authors have one surname in common
+		if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
+
+			// If one of them has only a surname and is the same we can say that they are the same author
+			if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
+				(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
+				return true;
+			// The authors have the same initials of Name in common
+			if (pl
+				.getName()
+				.stream()
+				.anyMatch(
+					nl -> pr
+						.getName()
+						.stream()
+						.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
+				return true;
+		}
+
+		// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
+		// We verify if we have an exact match between name and surname
+		if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
+			pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
+			return true;
+		else
+			return false;
+	}
+	//
+
+	/**
+	 * Method to enrich ORCID information in one list of authors based on another list
+	 *
+	 * @param baseAuthor  the Author List in the OAF Entity
+	 * @param orcidAuthor The list of ORCID Author intersected
+	 * @return The Author List of the OAF Entity enriched with the orcid Author
+	 */
+	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
+
+		if (baseAuthor == null || baseAuthor.isEmpty())
+			return orcidAuthor;
+
+		if (orcidAuthor == null || orcidAuthor.isEmpty())
+			return baseAuthor;
+
+		if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
+			return baseAuthor;
+
+		final List<Author> oAuthor = new ArrayList<>();
+		oAuthor.addAll(orcidAuthor);
+
+		baseAuthor.forEach(ba -> {
+			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
+			if (aMatch.isPresent()) {
+				final Author sameAuthor = aMatch.get();
+				addPid(ba, sameAuthor.getPid());
+				oAuthor.remove(sameAuthor);
+			}
+		});
+		return baseAuthor;
+	}
+
+	private static void addPid(final Author a, final List<StructuredProperty> pids) {
+
+		if (a.getPid() == null) {
+			a.setPid(new ArrayList<>());
+		}
+
+		a.getPid().addAll(pids);
+
+	}
+
 	public static String pidToComparableString(StructuredProperty pid) {
 		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
 			: "";
@ -171,7 +296,7 @@ public class AuthorMerger {
 		}
 	}

-	private static String normalize(final String s) {
+	public static String normalize(final String s) {
 		String[] normalized = nfd(s)
 			.toLowerCase()
 			// do not compact the regexes in a single expression, would cause StackOverflowError
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
@ -1,97 +0,0 @@
-
-package eu.dnetlib.dhp.oa.merge;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.util.Objects;
-import java.util.Optional;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FilterFunction;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.common.HdfsSupport;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-
-public class DispatchEntitiesSparkJob {
-
-	private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesSparkJob.class);
-
-	public static void main(String[] args) throws Exception {
-
-		String jsonConfiguration = IOUtils
-			.toString(
-				Objects
-					.requireNonNull(
-						DispatchEntitiesSparkJob.class
-							.getResourceAsStream(
-								"/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json")));
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		String inputPath = parser.get("inputPath");
-		log.info("inputPath: {}", inputPath);
-
-		String outputPath = parser.get("outputPath");
-		log.info("outputPath: {}", outputPath);
-
-		boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
-		log.info("filterInvisible: {}", filterInvisible);
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> dispatchEntities(spark, inputPath, outputPath, filterInvisible));
-	}
-
-	private static void dispatchEntities(
-		SparkSession spark,
-		String inputPath,
-		String outputPath,
-		boolean filterInvisible) {
-
-		Dataset<String> df = spark.read().textFile(inputPath);
-
-		ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> {
-			String entityType = entry.getKey();
-			Class<?> clazz = entry.getValue();
-
-			final String entityPath = outputPath + "/" + entityType;
-			if (!entityType.equalsIgnoreCase("relation")) {
-				HdfsSupport.remove(entityPath, spark.sparkContext().hadoopConfiguration());
-				Dataset<Row> entityDF = spark
-					.read()
-					.schema(Encoders.bean(clazz).schema())
-					.json(
-						df
-							.filter((FilterFunction<String>) s -> s.startsWith(clazz.getName()))
-							.map(
-								(MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"),
-								Encoders.STRING()));
-
-				if (filterInvisible) {
-					entityDF = entityDF.filter("dataInfo.invisible != true");
-				}
-
-				entityDF
-					.write()
-					.mode(SaveMode.Overwrite)
-					.option("compression", "gzip")
-					.json(entityPath);
-			}
-		});
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@ -2,50 +2,49 @@
 package eu.dnetlib.dhp.oa.merge;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.functions.when;

-import java.io.IOException;
-import java.util.List;
-import java.util.Objects;
+import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ForkJoinPool;
 import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.*;
-import org.apache.spark.sql.expressions.Aggregator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.jayway.jsonpath.Configuration;
-import com.jayway.jsonpath.DocumentContext;
-import com.jayway.jsonpath.JsonPath;
-import com.jayway.jsonpath.Option;
-
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.common.EntityType;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import scala.Tuple2;

 /**
 * Groups the graph content by entity identifier to ensure ID uniqueness
 */
 public class GroupEntitiesSparkJob {
-
 	private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);

-	private static final String ID_JPATH = "$.id";
+	private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);

-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
-		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+	private ArgumentApplicationParser parser;
+
+	public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
+		this.parser = parser;
+	}

 	public static void main(String[] args) throws Exception {

@ -63,141 +62,133 @@ public class GroupEntitiesSparkJob {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

+		final String isLookupUrl = parser.get("isLookupUrl");
+		log.info("isLookupUrl: {}", isLookupUrl);
+
+		final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
+
+		new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
+	}
+
+	public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
+		throws ISLookUpException {
+
 		String graphInputPath = parser.get("graphInputPath");
 		log.info("graphInputPath: {}", graphInputPath);

+		String checkpointPath = parser.get("checkpointPath");
+		log.info("checkpointPath: {}", checkpointPath);
+
 		String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

+		boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
+		log.info("filterInvisible: {}", filterInvisible);
+
 		SparkConf conf = new SparkConf();
 		conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
 		conf.registerKryoClasses(ModelSupport.getOafModelClasses());

+		final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
+
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
-				groupEntities(spark, graphInputPath, outputPath);
+				HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
+				groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
 			});
 	}

 	private static void groupEntities(
 		SparkSession spark,
 		String inputPath,
-		String outputPath) {
+		String checkpointPath,
+		String outputPath,
+		boolean filterInvisible, VocabularyGroup vocs) {

-		final TypedColumn<OafEntity, OafEntity> aggregator = new GroupingAggregator().toColumn();
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-		spark
-			.read()
-			.textFile(toSeq(listEntityPaths(inputPath, sc)))
-			.map((MapFunction<String, OafEntity>) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class))
-			.filter((FilterFunction<OafEntity>) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e)))
-			.groupByKey((MapFunction<OafEntity, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
-			.agg(aggregator)
+		Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
+
+		for (Map.Entry<EntityType, Class> e : ModelSupport.entityTypes.entrySet()) {
+			String entity = e.getKey().name();
+			Class<? extends OafEntity> entityClass = e.getValue();
+			String entityInputPath = inputPath + "/" + entity;
+
+			if (!HdfsSupport.exists(entityInputPath, spark.sparkContext().hadoopConfiguration())) {
+				continue;
+			}
+
+			allEntities = allEntities
+				.union(
+					((Dataset<OafEntity>) spark
+						.read()
+						.schema(Encoders.bean(entityClass).schema())
+						.json(entityInputPath)
+						.filter("length(id) > 0")
+						.as(Encoders.bean(entityClass)))
+							.map((MapFunction<OafEntity, OafEntity>) r -> r, OAFENTITY_KRYO_ENC));
+		}
+
+		Dataset<?> groupedEntities = allEntities
 			.map(
-				(MapFunction<Tuple2<String, OafEntity>, String>) t -> t._2().getClass().getName() +
-					"|" + OBJECT_MAPPER.writeValueAsString(t._2()),
-				Encoders.STRING())
+				(MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
+					.applyCoarVocabularies(entity, vocs),
+				OAFENTITY_KRYO_ENC)
+			.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
+			.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
+			.map(
+				(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
+					t._2().getClass().getName(), t._2()),
+				Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));
+
+		// pivot on "_1" (classname of the entity)
+		// created columns containing only entities of the same class
+		for (Map.Entry<EntityType, Class> e : ModelSupport.entityTypes.entrySet()) {
+			String entity = e.getKey().name();
+			Class<? extends OafEntity> entityClass = e.getValue();
+
+			groupedEntities = groupedEntities
+				.withColumn(
+					entity,
+					when(col("_1").equalTo(entityClass.getName()), col("_2")));
+		}
+
+		groupedEntities
+			.drop("_1", "_2")
 			.write()
-			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
-			.text(outputPath);
-	}
+			.option("compression", "gzip")
+			.save(checkpointPath);

-	public static class GroupingAggregator extends Aggregator<OafEntity, OafEntity, OafEntity> {
+		ForkJoinPool parPool = new ForkJoinPool(ModelSupport.entityTypes.size());

-		@Override
-		public OafEntity zero() {
-			return null;
-		}
-
-		@Override
-		public OafEntity reduce(OafEntity b, OafEntity a) {
-			return mergeAndGet(b, a);
-		}
-
-		private OafEntity mergeAndGet(OafEntity b, OafEntity a) {
-			if (Objects.nonNull(a) && Objects.nonNull(b)) {
-				return OafMapperUtils.mergeEntities(b, a);
-			}
-			return Objects.isNull(a) ? b : a;
-		}
-
-		@Override
-		public OafEntity merge(OafEntity b, OafEntity a) {
-			return mergeAndGet(b, a);
-		}
-
-		@Override
-		public OafEntity finish(OafEntity j) {
-			return j;
-		}
-
-		@Override
-		public Encoder<OafEntity> bufferEncoder() {
-			return Encoders.kryo(OafEntity.class);
-		}
-
-		@Override
-		public Encoder<OafEntity> outputEncoder() {
-			return Encoders.kryo(OafEntity.class);
-		}
-
-	}
-
-	private static OafEntity parseOaf(String s) {
-
-		DocumentContext dc = JsonPath
-			.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
-		final String id = dc.read(ID_JPATH);
-		if (StringUtils.isNotBlank(id)) {
-
-			String prefix = StringUtils.substringBefore(id, "|");
-			switch (prefix) {
-				case "10":
-					return parse(s, Datasource.class);
-				case "20":
-					return parse(s, Organization.class);
-				case "40":
-					return parse(s, Project.class);
-				case "50":
-					String resultType = dc.read("$.resulttype.classid");
-					switch (resultType) {
-						case "publication":
-							return parse(s, Publication.class);
-						case "dataset":
-							return parse(s, eu.dnetlib.dhp.schema.oaf.Dataset.class);
-						case "software":
-							return parse(s, Software.class);
-						case "other":
-							return parse(s, OtherResearchProduct.class);
-						default:
-							throw new IllegalArgumentException(String.format("invalid resultType: '%s'", resultType));
-					}
-				default:
-					throw new IllegalArgumentException(String.format("invalid id prefix: '%s'", prefix));
-			}
-		} else {
-			throw new IllegalArgumentException(String.format("invalid oaf: '%s'", s));
-		}
-	}
-
-	private static <T extends OafEntity> OafEntity parse(String s, Class<T> clazz) {
-		try {
-			return OBJECT_MAPPER.readValue(s, clazz);
-		} catch (IOException e) {
-			throw new IllegalArgumentException(e);
-		}
-	}
-
-	private static List<String> listEntityPaths(String inputPath, JavaSparkContext sc) {
-		return HdfsSupport
-			.listFiles(inputPath, sc.hadoopConfiguration())
+		ModelSupport.entityTypes
+			.entrySet()
 			.stream()
-			.filter(f -> !f.toLowerCase().contains("relation"))
-			.collect(Collectors.toList());
-	}
+			.map(e -> parPool.submit(() -> {
+				String entity = e.getKey().name();
+				Class<? extends OafEntity> entityClass = e.getValue();

+				spark
+					.read()
+					.load(checkpointPath)
+					.select(col(entity).as("value"))
+					.filter("value IS NOT NULL")
+					.as(OAFENTITY_KRYO_ENC)
+					.map((MapFunction<OafEntity, OafEntity>) r -> r, (Encoder<OafEntity>) Encoders.bean(entityClass))
+					.filter(filterInvisible ? "dataInfo.invisible != TRUE" : "TRUE")
+					.write()
+					.mode(SaveMode.Overwrite)
+					.option("compression", "gzip")
+					.json(outputPath + "/" + entity);
+			}))
+			.collect(Collectors.toList())
+			.forEach(t -> {
+				try {
+					t.get();
+				} catch (InterruptedException | ExecutionException e) {
+					throw new RuntimeException(e);
+				}
+			});
+	}
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -1,8 +1,12 @@

 package eu.dnetlib.dhp.schema.oaf.utils;

+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;

+import java.net.MalformedURLException;
+import java.net.URL;
 import java.time.LocalDate;
 import java.time.ZoneId;
 import java.time.format.DateTimeFormatter;
@ -19,6 +23,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;

 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;
@ -26,6 +31,10 @@ import me.xuender.unidecode.Unidecode;

 public class GraphCleaningFunctions extends CleaningFunctions {

+	public static final String DNET_PUBLISHERS = "dnet:publishers";
+
+	public static final String DNET_LICENSES = "dnet:licenses";
+
 	public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
 	public static final int ORCID_LEN = 19;
 	public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
@ -37,6 +46,67 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 	public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
 	private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";

+	private static final Set<String> INVALID_AUTHOR_NAMES = new HashSet<>();
+
+	private static final Set<String> INVALID_URLS = new HashSet<>();
+
+	private static final Set<String> INVALID_URL_HOSTS = new HashSet<>();
+
+	private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet<>();
+
+	static {
+		PEER_REVIEWED_TYPES.add("Article");
+		PEER_REVIEWED_TYPES.add("Part of book or chapter of book");
+		PEER_REVIEWED_TYPES.add("Book");
+		PEER_REVIEWED_TYPES.add("Doctoral thesis");
+		PEER_REVIEWED_TYPES.add("Master thesis");
+		PEER_REVIEWED_TYPES.add("Data Paper");
+		PEER_REVIEWED_TYPES.add("Thesis");
+		PEER_REVIEWED_TYPES.add("Bachelor thesis");
+		PEER_REVIEWED_TYPES.add("Conference object");
+
+		INVALID_AUTHOR_NAMES.add("(:null)");
+		INVALID_AUTHOR_NAMES.add("(:unap)");
+		INVALID_AUTHOR_NAMES.add("(:tba)");
+		INVALID_AUTHOR_NAMES.add("(:unas)");
+		INVALID_AUTHOR_NAMES.add("(:unav)");
+		INVALID_AUTHOR_NAMES.add("(:unkn)");
+		INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
+		INVALID_AUTHOR_NAMES.add(":none");
+		INVALID_AUTHOR_NAMES.add(":null");
+		INVALID_AUTHOR_NAMES.add(":unas");
+		INVALID_AUTHOR_NAMES.add(":unav");
+		INVALID_AUTHOR_NAMES.add(":unkn");
+		INVALID_AUTHOR_NAMES.add("[autor desconocido]");
+		INVALID_AUTHOR_NAMES.add("[s. n.]");
+		INVALID_AUTHOR_NAMES.add("[s.n]");
+		INVALID_AUTHOR_NAMES.add("[unknown]");
+		INVALID_AUTHOR_NAMES.add("anonymous");
+		INVALID_AUTHOR_NAMES.add("n.n.");
+		INVALID_AUTHOR_NAMES.add("nn");
+		INVALID_AUTHOR_NAMES.add("no name supplied");
+		INVALID_AUTHOR_NAMES.add("none");
+		INVALID_AUTHOR_NAMES.add("none available");
+		INVALID_AUTHOR_NAMES.add("not available not available");
+		INVALID_AUTHOR_NAMES.add("null &na;");
+		INVALID_AUTHOR_NAMES.add("null anonymous");
+		INVALID_AUTHOR_NAMES.add("unbekannt");
+		INVALID_AUTHOR_NAMES.add("unknown");
+
+		INVALID_URL_HOSTS.add("creativecommons.org");
+		INVALID_URL_HOSTS.add("www.academia.edu");
+		INVALID_URL_HOSTS.add("academia.edu");
+		INVALID_URL_HOSTS.add("researchgate.net");
+		INVALID_URL_HOSTS.add("www.researchgate.net");
+
+		INVALID_URLS.add("http://repo.scoap3.org/api");
+		INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
+		INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
+		INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
+		INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
+		INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
+	}
+
 	public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
 		if (ModelSupport.isSubClass(value, Result.class)) {
 			final Result res = (Result) value;
@ -273,6 +343,12 @@ public class GraphCleaningFunctions extends CleaningFunctions {

 	public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {

+		if (Objects.isNull(value.getDataInfo())) {
+			final DataInfo d = new DataInfo();
+			d.setDeletedbyinference(false);
+			value.setDataInfo(d);
+		}
+
 		if (value instanceof OafEntity) {

 			OafEntity e = (OafEntity) value;
@ -292,6 +368,10 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 			} else if (value instanceof Result) {
 				Result r = (Result) value;

+				if (Objects.isNull(r.getContext())) {
+					r.setContext(new ArrayList<>());
+				}
+
 				if (Objects.nonNull(r.getFulltext())
 					&& (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
 						ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
@ -334,6 +414,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 									.getPublisher()
 									.getValue()
 									.replaceAll(NAME_CLEANING_REGEX, " "));
+
+						if (vocs.vocabularyExists(DNET_PUBLISHERS)) {
+							vocs
+								.find(DNET_PUBLISHERS)
+								.map(voc -> voc.getTermBySynonym(r.getPublisher().getValue()))
+								.map(VocabularyTerm::getName)
+								.ifPresent(publisher -> r.getPublisher().setValue(publisher));
+						}
 					}
 				}
 				if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
@ -493,6 +581,43 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 						if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) {
 							i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
 						}
+
+						if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) {
+							vocs
+								.find(DNET_LICENSES)
+								.map(voc -> voc.getTermBySynonym(i.getLicense().getValue()))
+								.map(VocabularyTerm::getId)
+								.ifPresent(license -> i.getLicense().setValue(license));
+						}
+
+						// from the script from Dimitris
+						if ("0000".equals(i.getRefereed().getClassid())) {
+							final boolean isFromCrossref = Optional
+								.ofNullable(i.getCollectedfrom())
+								.map(KeyValue::getKey)
+								.map(id -> id.equals(ModelConstants.CROSSREF_ID))
+								.orElse(false);
+							final boolean hasDoi = Optional
+								.ofNullable(i.getPid())
+								.map(
+									pid -> pid
+										.stream()
+										.anyMatch(
+											p -> PidType.doi.toString().equals(p.getQualifier().getClassid())))
+								.orElse(false);
+							final boolean isPeerReviewedType = PEER_REVIEWED_TYPES
+								.contains(i.getInstancetype().getClassname());
+							final boolean noOtherLitType = r
+								.getInstance()
+								.stream()
+								.noneMatch(ii -> "Other literature type".equals(ii.getInstancetype().getClassname()));
+							if (isFromCrossref && hasDoi && isPeerReviewedType && noOtherLitType) {
+								i.setRefereed(qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
+							} else {
+								i.setRefereed(qualifier("0002", "nonPeerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
+							}
+						}
+
 						if (Objects.nonNull(i.getDateofacceptance())) {
 							Optional<String> date = cleanDateField(i.getDateofacceptance());
 							if (date.isPresent()) {
@ -506,6 +631,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 								ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
 							i.setFulltext(null);
 						}
+						if (Objects.nonNull(i.getUrl())) {
+							i
+								.setUrl(
+									i
+										.getUrl()
+										.stream()
+										.filter(GraphCleaningFunctions::urlFilter)
+										.collect(Collectors.toList()));
+						}
 					}
 				}
 				if (Objects.isNull(r.getBestaccessright())
@ -528,8 +662,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 								.getAuthor()
 								.stream()
 								.filter(Objects::nonNull)
-								.filter(a -> StringUtils.isNotBlank(a.getFullname()))
-								.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
+								.filter(GraphCleaningFunctions::isValidAuthorName)
 								.map(GraphCleaningFunctions::cleanupAuthor)
 								.collect(Collectors.toList()));

@ -556,6 +689,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 										.filter(Objects::nonNull)
 										.filter(p -> Objects.nonNull(p.getQualifier()))
 										.filter(p -> StringUtils.isNotBlank(p.getValue()))
+										.filter(
+											p -> StringUtils
+												.contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID))
 										.map(p -> {
 											// hack to distinguish orcid from orcid_pending
 											String pidProvenance = getProvenance(p.getDataInfo());
@ -565,7 +701,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 												.toLowerCase()
 												.contains(ModelConstants.ORCID)) {
 												if (pidProvenance
-													.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
+													.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) ||
+													pidProvenance.equals("ORCID_ENRICHMENT")) {
 													p.getQualifier().setClassid(ModelConstants.ORCID);
 												} else {
 													p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
@ -687,12 +824,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 	// HELPERS

 	private static boolean isValidAuthorName(Author a) {
-		return !Stream
-			.of(a.getFullname(), a.getName(), a.getSurname())
-			.filter(s -> s != null && !s.isEmpty())
-			.collect(Collectors.joining(""))
-			.toLowerCase()
-			.matches(INVALID_AUTHOR_REGEX);
+		return StringUtils.isNotBlank(a.getFullname()) &&
+			StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")) &&
+			!INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase(a.getFullname()).trim()) &&
+			!Stream
+				.of(a.getFullname(), a.getName(), a.getSurname())
+				.filter(StringUtils::isNotBlank)
+				.collect(Collectors.joining(""))
+				.toLowerCase()
+				.matches(INVALID_AUTHOR_REGEX);
+	}
+
+	private static boolean urlFilter(String u) {
+		try {
+			final URL url = new URL(u);
+			if (StringUtils.isBlank(url.getPath()) || "/".equals(url.getPath())) {
+				return false;
+			}
+			if (INVALID_URL_HOSTS.contains(url.getHost())) {
+				return false;
+			}
+			return !INVALID_URLS.contains(url.toString());
+		} catch (MalformedURLException ex) {
+			return false;
+		}
 	}

 	private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
@ -742,4 +897,105 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 		return s;
 	}

+	public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
+
+		if (entity instanceof Result) {
+			final Result result = (Result) entity;
+
+			Optional
+				.ofNullable(result.getInstance())
+				.ifPresent(
+					instances -> instances
+						.forEach(
+							instance -> {
+								if (Objects.isNull(instance.getInstanceTypeMapping())) {
+									List<InstanceTypeMapping> mapping = Lists.newArrayList();
+									mapping
+										.add(
+											OafMapperUtils
+												.instanceTypeMapping(
+													instance.getInstancetype().getClassname(),
+													OPENAIRE_COAR_RESOURCE_TYPES_3_1));
+									instance.setInstanceTypeMapping(mapping);
+								}
+								Optional<InstanceTypeMapping> optionalItm = instance
+									.getInstanceTypeMapping()
+									.stream()
+									.filter(GraphCleaningFunctions::originalResourceType)
+									.findFirst();
+								if (optionalItm.isPresent()) {
+									InstanceTypeMapping coarItm = optionalItm.get();
+									Optional
+										.ofNullable(
+											vocs
+												.lookupTermBySynonym(
+													OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
+										.ifPresent(type -> {
+											coarItm.setTypeCode(type.getClassid());
+											coarItm.setTypeLabel(type.getClassname());
+										});
+									final List<InstanceTypeMapping> mappings = Lists.newArrayList();
+									if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
+										Optional
+											.ofNullable(
+												vocs
+													.lookupTermBySynonym(
+														OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
+											.ifPresent(
+												type -> mappings
+													.add(
+														OafMapperUtils
+															.instanceTypeMapping(coarItm.getTypeCode(), type)));
+									}
+									if (!mappings.isEmpty()) {
+										instance.getInstanceTypeMapping().addAll(mappings);
+									}
+								}
+							}));
+			result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
+		}
+
+		return entity;
+	}
+
+	private static boolean originalResourceType(InstanceTypeMapping itm) {
+		return StringUtils.isNotBlank(itm.getOriginalType()) &&
+			OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
+			StringUtils.isBlank(itm.getTypeCode()) &&
+			StringUtils.isBlank(itm.getTypeLabel());
+	}
+
+	private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
+		return Optional
+			.ofNullable(instances)
+			.map(ii -> {
+				if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
+					Optional<InstanceTypeMapping> itm = ii
+						.stream()
+						.filter(Objects::nonNull)
+						.flatMap(
+							i -> Optional
+								.ofNullable(i.getInstanceTypeMapping())
+								.map(Collection::stream)
+								.orElse(Stream.empty()))
+						.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
+						.findFirst();
+
+					if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
+						return null;
+					} else {
+						final String typeCode = itm.get().getTypeCode();
+						return Optional
+							.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
+							.orElseThrow(
+								() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
+									OPENAIRE_META_RESOURCE_TYPE));
+					}
+				} else {
+					throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
+				}
+			})
+			.orElse(null);
+	}
+
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -14,7 +14,6 @@ import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;

 import eu.dnetlib.dhp.schema.common.AccessRightComparator;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;

@ -141,6 +140,28 @@ public class OafMapperUtils {
 			.collect(Collectors.toList());
 	}

+	public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
+		String vocabularyName) {
+		final InstanceTypeMapping m = new InstanceTypeMapping();
+		m.setVocabularyName(vocabularyName);
+		m.setOriginalType(originalType);
+		m.setTypeCode(code);
+		m.setTypeLabel(label);
+		return m;
+	}
+
+	public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
+		return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
+	}
+
+	public static InstanceTypeMapping instanceTypeMapping(String originalType) {
+		return instanceTypeMapping(originalType, null, null, null);
+	}
+
+	public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
+		return instanceTypeMapping(originalType, null, null, vocabularyName);
+	}
+
 	public static Qualifier unknown(final String schemeid, final String schemename) {
 		return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
 	}
--- a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json
@ -1,26 +0,0 @@
-[
-  {
-    "paramName": "issm",
-    "paramLongName": "isSparkSessionManaged",
-    "paramDescription": "when true will stop SparkSession after job execution",
-    "paramRequired": false
-  },
-  {
-    "paramName": "i",
-    "paramLongName": "inputPath",
-    "paramDescription": "the source path",
-    "paramRequired": true
-  },
-  {
-    "paramName": "o",
-    "paramLongName": "outputPath",
-    "paramDescription": "path of the output graph",
-    "paramRequired": true
-  },
-  {
-    "paramName": "fi",
-    "paramLongName": "filterInvisible",
-    "paramDescription": "if true filters out invisible entities",
-    "paramRequired": true
-  }
-]
--- a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
@ -8,13 +8,31 @@
  {
    "paramName": "gin",
    "paramLongName": "graphInputPath",
-    "paramDescription": "the graph root path",
+    "paramDescription": "the input graph root path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "cp",
+    "paramLongName": "checkpointPath",
+    "paramDescription": "checkpoint directory",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
-    "paramDescription": "the output merged graph root path",
+    "paramDescription": "the output graph root path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "fi",
+    "paramLongName": "filterInvisible",
+    "paramDescription": "if true filters out invisible entities",
+    "paramRequired": true
+  },
+  {
+    "paramName": "isu",
+    "paramLongName": "isLookupUrl",
+    "paramDescription": "url to the ISLookup Service",
    "paramRequired": true
  }
 ]
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -1,109 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-@Disabled
-class ZenodoAPIClientTest {
-
-	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
-	private final String ACCESS_TOKEN = "";
-
-	private final String CONCEPT_REC_ID = "657113";
-
-	private final String depositionId = "674915";
-
-	@Test
-	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewDeposition() throws IOException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(201, client.newDeposition());
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionNewName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionOldName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-}
--- a/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
@ -0,0 +1,114 @@
+
+package eu.dnetlib.oa.merge;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Objects;
+
+import org.junit.jupiter.api.Test;
+import org.junit.platform.commons.util.StringUtils;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.merge.AuthorMerger;
+import eu.dnetlib.dhp.schema.oaf.Author;
+
+public class AuthorMergerTest {
+
+	@Test
+	public void testEnrcichAuthor() throws Exception {
+		final ObjectMapper mapper = new ObjectMapper();
+
+		BufferedReader pr = new BufferedReader(new InputStreamReader(
+			Objects
+				.requireNonNull(
+					AuthorMergerTest.class
+						.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
+		BufferedReader or = new BufferedReader(new InputStreamReader(
+			Objects
+				.requireNonNull(
+					AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
+
+		TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
+		};
+		String pubLine;
+
+		int i = 0;
+		while ((pubLine = pr.readLine()) != null) {
+			final String pubId = pubLine;
+			final String MatchPidOrcid = or.readLine();
+			final String pubOrcid = or.readLine();
+
+			final String data = pr.readLine();
+
+			if (StringUtils.isNotBlank(data)) {
+				List<Author> publicationAuthors = mapper.readValue(data, aclass);
+				List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
+				System.out.printf("OAF ID = %s \n", pubId);
+				System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
+				System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
+				System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
+				System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
+
+				long originalAuthorWithPiD = publicationAuthors
+					.stream()
+					.filter(
+						a -> a.getPid() != null && a
+							.getPid()
+							.stream()
+							.anyMatch(
+								p -> p.getQualifier() != null
+									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+					.count();
+				long start = System.currentTimeMillis();
+
+//                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+				final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+
+				long enrichedAuthorWithPid = enrichedList
+					.stream()
+					.filter(
+						a -> a.getPid() != null && a
+							.getPid()
+							.stream()
+							.anyMatch(
+								p -> p.getQualifier() != null
+									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+					.count();
+
+				long totalTime = (System.currentTimeMillis() - start) / 1000;
+				System.out
+					.printf(
+						"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
+						enrichedAuthorWithPid);
+
+				System.out.println("=================");
+			}
+		}
+	}
+
+	@Test
+	public void checkSimilarityTest() {
+		final Author left = new Author();
+		left.setName("Anand");
+		left.setSurname("Rachna");
+		left.setFullname("Anand, Rachna");
+
+		System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
+
+		final Author right = new Author();
+		right.setName("Rachna");
+		right.setSurname("Anand");
+		right.setFullname("Rachna, Anand");
+//        System.out.println(AuthorMerger.normalize(right.getFullname()));
+		boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
+
+		assertTrue(same);
+
+	}
+
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
@ -18,7 +18,6 @@ package eu.dnetlib.pace.util;
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 /*
 * Diff Match and Patch
 * Copyright 2018 The diff-match-patch Authors.
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
@ -40,6 +40,7 @@ public class Constants {
 	public static final String SDG_CLASS_NAME = "Sustainable Development Goals";

 	public static final String NULL = "NULL";
+	public static final String NA = "N/A";

 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

@ -61,10 +62,16 @@ public class Constants {
 			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
 	}

-	public static Subject getSubject(String sbj, String classid, String classname,
-		String diqualifierclassid) {
-		if (sbj == null || sbj.equals(NULL))
+	public static Subject getSubject(String sbj, String classid, String classname, String diqualifierclassid,
+		Boolean split) {
+		if (sbj == null || sbj.equals(NULL) || sbj.startsWith(NA))
 			return null;
+		String trust = "";
+		String subject = sbj;
+		if (split) {
+			sbj = subject.split("@@")[0];
+			trust = subject.split("@@")[1];
+		}
 		Subject s = new Subject();
 		s.setValue(sbj);
 		s
@ -89,9 +96,14 @@ public class Constants {
 								UPDATE_CLASS_NAME,
 								ModelConstants.DNET_PROVENANCE_ACTIONS,
 								ModelConstants.DNET_PROVENANCE_ACTIONS),
-						""));
+						trust));

 		return s;
+	}
+
+	public static Subject getSubject(String sbj, String classid, String classname,
+		String diqualifierclassid) {
+		return getSubject(sbj, classid, classname, diqualifierclassid, false);

 	}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -12,6 +12,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
@ -57,11 +58,14 @@ public class PrepareAffiliationRelations implements Serializable {
 		Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String inputPath = parser.get("inputPath");
-		log.info("inputPath {}: ", inputPath);
+		final String crossrefInputPath = parser.get("crossrefInputPath");
+		log.info("crossrefInputPath: {}", crossrefInputPath);
+
+		final String pubmedInputPath = parser.get("pubmedInputPath");
+		log.info("pubmedInputPath: {}", pubmedInputPath);

 		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}: ", outputPath);
+		log.info("outputPath: {}", outputPath);

 		SparkConf conf = new SparkConf();

@ -70,12 +74,28 @@ public class PrepareAffiliationRelations implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Constants.removeOutputDir(spark, outputPath);
-				prepareAffiliationRelations(spark, inputPath, outputPath);
+
+				List<KeyValue> collectedFromCrossref = OafMapperUtils
+					.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
+				JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
+					spark, crossrefInputPath, collectedFromCrossref);
+
+				List<KeyValue> collectedFromPubmed = OafMapperUtils
+					.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
+				JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
+					spark, pubmedInputPath, collectedFromPubmed);
+
+				crossrefRelations
+					.union(pubmedRelations)
+					.saveAsHadoopFile(
+						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+
 			});
 	}

-	private static <I extends Result> void prepareAffiliationRelations(SparkSession spark, String inputPath,
-		String outputPath) {
+	private static <I extends Result> JavaPairRDD<Text, Text> prepareAffiliationRelations(SparkSession spark,
+		String inputPath,
+		List<KeyValue> collectedfrom) {

 		// load and parse affiliation relations from HDFS
 		Dataset<Row> df = spark
@ -92,7 +112,7 @@ public class PrepareAffiliationRelations implements Serializable {
 				new Column("matching.Confidence").as("confidence"));

 		// prepare action sets for affiliation relations
-		df
+		return df
 			.toJavaRDD()
 			.flatMap((FlatMapFunction<Row, Relation>) row -> {

@ -120,8 +140,6 @@ public class PrepareAffiliationRelations implements Serializable {
 						qualifier,
 						Double.toString(row.getAs("confidence")));

-				List<KeyValue> collectedfrom = OafMapperUtils.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
-
 				// return bi-directional relations
 				return getAffiliationRelationPair(paperId, affId, collectedfrom, dataInfo).iterator();

@ -129,9 +147,7 @@ public class PrepareAffiliationRelations implements Serializable {
 			.map(p -> new AtomicAction(Relation.class, p))
 			.mapToPair(
 				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
-					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
-			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
-
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))));
 	}

 	private static List<Relation> getAffiliationRelationPair(String paperId, String affId, List<KeyValue> collectedfrom,
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
@ -75,9 +75,12 @@ public class GetFOSSparkJob implements Serializable {
 		fosData.map((MapFunction<Row, FOSDataModel>) r -> {
 			FOSDataModel fosDataModel = new FOSDataModel();
 			fosDataModel.setDoi(r.getString(0).toLowerCase());
-			fosDataModel.setLevel1(r.getString(1));
-			fosDataModel.setLevel2(r.getString(2));
-			fosDataModel.setLevel3(r.getString(3));
+			fosDataModel.setLevel1(r.getString(2));
+			fosDataModel.setLevel2(r.getString(3));
+			fosDataModel.setLevel3(r.getString(4));
+			fosDataModel.setLevel4(r.getString(5));
+			fosDataModel.setScoreL3(String.valueOf(r.getDouble(6)));
+			fosDataModel.setScoreL4(String.valueOf(r.getDouble(7)));
 			return fosDataModel;
 		}, Encoders.bean(FOSDataModel.class))
 			.write()
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java
@ -1,178 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
-
-import static eu.dnetlib.dhp.actionmanager.Constants.*;
-import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME;
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
-import eu.dnetlib.dhp.actionmanager.bipmodel.score.deserializers.BipResultModel;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.common.HdfsSupport;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.Instance;
-import eu.dnetlib.dhp.schema.oaf.KeyValue;
-import eu.dnetlib.dhp.schema.oaf.Measure;
-import eu.dnetlib.dhp.schema.oaf.Result;
-import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-public class PrepareBipFinder implements Serializable {
-
-	private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class);
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-	public static void main(String[] args) throws Exception {
-
-		String jsonConfiguration = IOUtils
-			.toString(
-				PrepareBipFinder.class
-					.getResourceAsStream(
-						"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json"));
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-
-		parser.parseArgument(args);
-
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-
-		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
-		final String sourcePath = parser.get("sourcePath");
-		log.info("sourcePath {}: ", sourcePath);
-
-		final String outputPath = parser.get("outputPath");
-		log.info("outputPath {}: ", outputPath);
-
-		SparkConf conf = new SparkConf();
-
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-				HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
-				prepareResults(spark, sourcePath, outputPath);
-			});
-	}
-
-	private static void prepareResults(SparkSession spark, String inputPath, String outputPath) {
-
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-		JavaRDD<BipResultModel> bipDeserializeJavaRDD = sc
-			.textFile(inputPath)
-			.map(item -> OBJECT_MAPPER.readValue(item, BipResultModel.class));
-
-		spark
-			.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
-				BipScore bs = new BipScore();
-				bs.setId(key);
-				bs.setScoreList(entry.get(key));
-
-				return bs;
-			}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class))
-			.map((MapFunction<BipScore, Result>) v -> {
-				Result r = new Result();
-				final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId());
-
-				r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI));
-				Instance inst = new Instance();
-				inst.setMeasures(getMeasure(v));
-
-				inst
-					.setPid(
-						Arrays
-							.asList(
-								OafMapperUtils
-									.structuredProperty(
-										cleanedPid,
-										OafMapperUtils
-											.qualifier(
-												DOI, DOI_CLASSNAME,
-												ModelConstants.DNET_PID_TYPES,
-												ModelConstants.DNET_PID_TYPES),
-										null)));
-				r.setInstance(Arrays.asList(inst));
-				r
-					.setDataInfo(
-						OafMapperUtils
-							.dataInfo(
-								false, null, true,
-								false,
-								OafMapperUtils
-									.qualifier(
-										ModelConstants.PROVENANCE_ENRICH,
-										null,
-										ModelConstants.DNET_PROVENANCE_ACTIONS,
-										ModelConstants.DNET_PROVENANCE_ACTIONS),
-								null));
-				return r;
-			}, Encoders.bean(Result.class))
-			.write()
-			.mode(SaveMode.Overwrite)
-			.option("compression", "gzip")
-			.json(outputPath + "/bip");
-	}
-
-	private static List<Measure> getMeasure(BipScore value) {
-		return value
-			.getScoreList()
-			.stream()
-			.map(score -> {
-				Measure m = new Measure();
-				m.setId(score.getId());
-				m
-					.setUnit(
-						score
-							.getUnit()
-							.stream()
-							.map(unit -> {
-								KeyValue kv = new KeyValue();
-								kv.setValue(unit.getValue());
-								kv.setKey(unit.getKey());
-								kv
-									.setDataInfo(
-										OafMapperUtils
-											.dataInfo(
-												false,
-												UPDATE_DATA_INFO_TYPE,
-												true,
-												false,
-												OafMapperUtils
-													.qualifier(
-														UPDATE_MEASURE_BIP_CLASS_ID,
-														UPDATE_CLASS_NAME,
-														ModelConstants.DNET_PROVENANCE_ACTIONS,
-														ModelConstants.DNET_PROVENANCE_ACTIONS),
-												""));
-								return kv;
-							})
-							.collect(Collectors.toList()));
-				return m;
-			})
-			.collect(Collectors.toList());
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
@ -78,12 +78,20 @@ public class PrepareFOSSparkJob implements Serializable {
 				HashSet<String> level1 = new HashSet<>();
 				HashSet<String> level2 = new HashSet<>();
 				HashSet<String> level3 = new HashSet<>();
-				addLevels(level1, level2, level3, first);
-				it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
+				HashSet<String> level4 = new HashSet<>();
+				addLevels(level1, level2, level3, level4, first);
+				it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v));
 				List<Subject> sbjs = new ArrayList<>();
-				level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-				level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-				level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+				level1
+					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+				level2
+					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+				level3
+					.forEach(
+						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
+				level4
+					.forEach(
+						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
 				r.setSubject(sbjs);
 				r
 					.setDataInfo(
@ -106,11 +114,18 @@ public class PrepareFOSSparkJob implements Serializable {
 			.json(outputPath + "/fos");
 	}

+	private static void add(List<Subject> sbsjs, Subject sbj) {
+		if (sbj != null)
+			sbsjs.add(sbj);
+	}
+
 	private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3,
+		HashSet<String> level4,
 		FOSDataModel first) {
 		level1.add(first.getLevel1());
 		level2.add(first.getLevel2());
-		level3.add(first.getLevel3());
+		level3.add(first.getLevel3() + "@@" + first.getScoreL3());
+		level4.add(first.getLevel4() + "@@" + first.getScoreL4());
 	}

 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java
@ -69,9 +69,9 @@ public class SparkSaveUnresolved implements Serializable {
 			.mapGroups((MapGroupsFunction<String, Result, Result>) (k, it) -> {
 				Result ret = it.next();
 				it.forEachRemaining(r -> {
-					if (r.getInstance() != null) {
-						ret.setInstance(r.getInstance());
-					}
+//					if (r.getInstance() != null) {
+//						ret.setInstance(r.getInstance());
+//					}
 					if (r.getSubject() != null) {
 						if (ret.getSubject() != null)
 							ret.getSubject().addAll(r.getSubject());
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java
@ -11,21 +11,43 @@ public class FOSDataModel implements Serializable {
 	private String doi;

 	@CsvBindByPosition(position = 1)
+//    @CsvBindByName(column = "doi")
+	private String oaid;
+	@CsvBindByPosition(position = 2)
 //    @CsvBindByName(column = "level1")
 	private String level1;

-	@CsvBindByPosition(position = 2)
+	@CsvBindByPosition(position = 3)
 //    @CsvBindByName(column = "level2")
 	private String level2;

-	@CsvBindByPosition(position = 3)
+	@CsvBindByPosition(position = 4)
 //    @CsvBindByName(column = "level3")
 	private String level3;

+	@CsvBindByPosition(position = 5)
+//    @CsvBindByName(column = "level3")
+	private String level4;
+	@CsvBindByPosition(position = 6)
+	private String scoreL3;
+	@CsvBindByPosition(position = 7)
+	private String scoreL4;
+
 	public FOSDataModel() {

 	}

+	public FOSDataModel(String doi, String level1, String level2, String level3, String level4, String l3score,
+		String l4score) {
+		this.doi = doi;
+		this.level1 = level1;
+		this.level2 = level2;
+		this.level3 = level3;
+		this.level4 = level4;
+		this.scoreL3 = l3score;
+		this.scoreL4 = l4score;
+	}
+
 	public FOSDataModel(String doi, String level1, String level2, String level3) {
 		this.doi = doi;
 		this.level1 = level1;
@ -33,8 +55,41 @@ public class FOSDataModel implements Serializable {
 		this.level3 = level3;
 	}

-	public static FOSDataModel newInstance(String d, String level1, String level2, String level3) {
-		return new FOSDataModel(d, level1, level2, level3);
+	public static FOSDataModel newInstance(String d, String level1, String level2, String level3, String level4,
+		String scorel3, String scorel4) {
+		return new FOSDataModel(d, level1, level2, level3, level4, scorel3, scorel4);
+	}
+
+	public String getOaid() {
+		return oaid;
+	}
+
+	public void setOaid(String oaid) {
+		this.oaid = oaid;
+	}
+
+	public String getLevel4() {
+		return level4;
+	}
+
+	public void setLevel4(String level4) {
+		this.level4 = level4;
+	}
+
+	public String getScoreL3() {
+		return scoreL3;
+	}
+
+	public void setScoreL3(String scoreL3) {
+		this.scoreL3 = scoreL3;
+	}
+
+	public String getScoreL4() {
+		return scoreL4;
+	}
+
+	public void setScoreL4(String scoreL4) {
+		this.scoreL4 = scoreL4;
 	}

 	public String getDoi() {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
@ -10,8 +10,10 @@ import java.util.*;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.MapFunction;
@ -26,19 +28,29 @@ import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;
-import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
-import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
+import eu.dnetlib.dhp.schema.oaf.utils.*;
+import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;

 public class CreateActionSetSparkJob implements Serializable {
 	public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations";
 	public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";
-	private static final String ID_PREFIX = "50|doi_________::";
+
+	// DOI-to-DOI citations
+	public static final String COCI = "COCI";
+
+	// PMID-to-PMID citations
+	public static final String POCI = "POCI";
+
+	private static final String DOI_PREFIX = "50|doi_________::";
+
+	private static final String PMID_PREFIX = "50|pmid________::";
+
 	private static final String TRUST = "0.91";

 	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
+
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

 	public static void main(final String[] args) throws IOException, ParseException {
@ -62,7 +74,7 @@ public class CreateActionSetSparkJob implements Serializable {
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

 		final String inputPath = parser.get("inputPath");
-		log.info("inputPath {}", inputPath.toString());
+		log.info("inputPath {}", inputPath);

 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}", outputPath);
@ -76,41 +88,68 @@ public class CreateActionSetSparkJob implements Serializable {
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
-			spark -> {
-				extractContent(spark, inputPath, outputPath, shouldDuplicateRels);
-			});
+			spark -> extractContent(spark, inputPath, outputPath, shouldDuplicateRels));

 	}

 	private static void extractContent(SparkSession spark, String inputPath, String outputPath,
 		boolean shouldDuplicateRels) {
-		spark
+
+		getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, COCI)
+			.union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, POCI))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+	}
+
+	private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath,
+		boolean shouldDuplicateRels, String prefix) {
+		return spark
 			.read()
-			.textFile(inputPath + "/*")
+			.textFile(inputPath + "/" + prefix + "/" + prefix + "_JSON/*")
 			.map(
 				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
 				Encoders.bean(COCI.class))
 			.flatMap(
-				(FlatMapFunction<COCI, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
+				(FlatMapFunction<COCI, Relation>) value -> createRelation(
+					value, shouldDuplicateRels, prefix)
+						.iterator(),
 				Encoders.bean(Relation.class))
-			.filter((FilterFunction<Relation>) value -> value != null)
+			.filter((FilterFunction<Relation>) Objects::nonNull)
 			.toJavaRDD()
 			.map(p -> new AtomicAction(p.getClass(), p))
 			.mapToPair(
 				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
-					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
-			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
-
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))));
 	}

-	private static List<Relation> createRelation(COCI value, boolean duplicate) {
+	private static List<Relation> createRelation(COCI value, boolean duplicate, String p) {

 		List<Relation> relationList = new ArrayList<>();
+		String prefix;
+		String citing;
+		String cited;

-		String citing = ID_PREFIX
-			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting()));
-		final String cited = ID_PREFIX
-			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
+		switch (p) {
+			case COCI:
+				prefix = DOI_PREFIX;
+				citing = prefix
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCiting()));
+				cited = prefix
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCited()));
+				break;
+			case POCI:
+				prefix = PMID_PREFIX;
+				citing = prefix
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting()));
+				cited = prefix
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCited()));
+				break;
+			default:
+				throw new IllegalStateException("Invalid prefix: " + p);
+		}

 		if (!citing.equals(cited)) {
 			relationList
@ -120,7 +159,7 @@ public class CreateActionSetSparkJob implements Serializable {
 						cited, ModelConstants.CITES));

 			if (duplicate && value.getCiting().endsWith(".refs")) {
-				citing = ID_PREFIX + IdentifierFactory
+				citing = prefix + IdentifierFactory
 					.md5(
 						CleaningFunctions
 							.normalizePidValue(
@ -132,59 +171,30 @@ public class CreateActionSetSparkJob implements Serializable {
 		return relationList;
 	}

-	private static Collection<Relation> getRelations(String citing, String cited) {
-
-		return Arrays
-			.asList(
-				getRelation(citing, cited, ModelConstants.CITES),
-				getRelation(cited, citing, ModelConstants.IS_CITED_BY));
-	}
-
 	public static Relation getRelation(
 		String source,
 		String target,
-		String relclass) {
-		Relation r = new Relation();
-		r.setCollectedfrom(getCollectedFrom());
-		r.setSource(source);
-		r.setTarget(target);
-		r.setRelClass(relclass);
-		r.setRelType(ModelConstants.RESULT_RESULT);
-		r.setSubRelType(ModelConstants.CITATION);
-		r
-			.setDataInfo(
-				getDataInfo());
-		return r;
-	}
+		String relClass) {

-	public static List<KeyValue> getCollectedFrom() {
-		KeyValue kv = new KeyValue();
-		kv.setKey(ModelConstants.OPENOCITATIONS_ID);
-		kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
-
-		return Arrays.asList(kv);
-	}
-
-	public static DataInfo getDataInfo() {
-		DataInfo di = new DataInfo();
-		di.setInferred(false);
-		di.setDeletedbyinference(false);
-		di.setTrust(TRUST);
-
-		di
-			.setProvenanceaction(
-				getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS));
-		return di;
-	}
-
-	public static Qualifier getQualifier(String class_id, String class_name,
-		String qualifierSchema) {
-		Qualifier pa = new Qualifier();
-		pa.setClassid(class_id);
-		pa.setClassname(class_name);
-		pa.setSchemeid(qualifierSchema);
-		pa.setSchemename(qualifierSchema);
-		return pa;
+		return OafMapperUtils
+			.getRelation(
+				source,
+				target,
+				ModelConstants.RESULT_RESULT,
+				ModelConstants.CITATION,
+				relClass,
+				Arrays
+					.asList(
+						OafMapperUtils.keyValue(ModelConstants.OPENOCITATIONS_ID, ModelConstants.OPENOCITATIONS_NAME)),
+				OafMapperUtils
+					.dataInfo(
+						false, null, false, false,
+						OafMapperUtils
+							.qualifier(
+								OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME,
+								ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
+						TRUST),
+				null);
 	}

 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.opencitations;

 import java.io.*;
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.Objects;
 import java.util.zip.GZIPOutputStream;
 import java.util.zip.ZipEntry;
@ -37,7 +38,7 @@ public class GetOpenCitationsRefs implements Serializable {
 		parser.parseArgument(args);

 		final String[] inputFile = parser.get("inputFile").split(";");
-		log.info("inputFile {}", inputFile.toString());
+		log.info("inputFile {}", Arrays.asList(inputFile));

 		final String workingPath = parser.get("workingPath");
 		log.info("workingPath {}", workingPath);
@ -45,6 +46,9 @@ public class GetOpenCitationsRefs implements Serializable {
 		final String hdfsNameNode = parser.get("hdfsNameNode");
 		log.info("hdfsNameNode {}", hdfsNameNode);

+		final String prefix = parser.get("prefix");
+		log.info("prefix {}", prefix);
+
 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);

@ -53,30 +57,31 @@ public class GetOpenCitationsRefs implements Serializable {
 		GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();

 		for (String file : inputFile) {
-			ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem);
+			ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem, prefix);
 		}

 	}

-	private void doExtract(String inputFile, String workingPath, FileSystem fileSystem)
+	private void doExtract(String inputFile, String workingPath, FileSystem fileSystem, String prefix)
 		throws IOException {

 		final Path path = new Path(inputFile);

 		FSDataInputStream oc_zip = fileSystem.open(path);

-		int count = 1;
+		// int count = 1;
 		try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
 			ZipEntry entry = null;
 			while ((entry = zis.getNextEntry()) != null) {

 				if (!entry.isDirectory()) {
 					String fileName = entry.getName();
-					fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
-					count++;
+					// fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
+					fileName = fileName.substring(0, fileName.lastIndexOf("."));
+					// count++;
 					try (
 						FSDataOutputStream out = fileSystem
-							.create(new Path(workingPath + "/COCI/" + fileName + ".gz"));
+							.create(new Path(workingPath + "/" + prefix + "/" + fileName + ".gz"));
 						GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {

 						IOUtils.copy(zis, gzipOs);
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.Optional;

 import org.apache.commons.io.IOUtils;
@ -42,13 +43,16 @@ public class ReadCOCI implements Serializable {
 		log.info("outputPath: {}", outputPath);

 		final String[] inputFile = parser.get("inputFile").split(";");
-		log.info("inputFile {}", inputFile.toString());
+		log.info("inputFile {}", Arrays.asList(inputFile));
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

 		final String workingPath = parser.get("workingPath");
 		log.info("workingPath {}", workingPath);

+		final String format = parser.get("format");
+		log.info("format {}", format);
+
 		SparkConf sconf = new SparkConf();

 		final String delimiter = Optional
@ -64,16 +68,17 @@ public class ReadCOCI implements Serializable {
 					workingPath,
 					inputFile,
 					outputPath,
-					delimiter);
+					delimiter,
+					format);
 			});
 	}

 	private static void doRead(SparkSession spark, String workingPath, String[] inputFiles,
 		String outputPath,
-		String delimiter) throws IOException {
+		String delimiter, String format) {

 		for (String inputFile : inputFiles) {
-			String p_string = workingPath + "/" + inputFile + ".gz";
+			String pString = workingPath + "/" + inputFile + ".gz";

 			Dataset<Row> cociData = spark
 				.read()
@ -82,14 +87,20 @@ public class ReadCOCI implements Serializable {
 				.option("inferSchema", "true")
 				.option("header", "true")
 				.option("quotes", "\"")
-				.load(p_string)
+				.load(pString)
 				.repartition(100);

 			cociData.map((MapFunction<Row, COCI>) row -> {
 				COCI coci = new COCI();
+				if (format.equals("COCI")) {
+					coci.setCiting(row.getString(1));
+					coci.setCited(row.getString(2));
+				} else {
+					coci.setCiting(String.valueOf(row.getInt(1)));
+					coci.setCited(String.valueOf(row.getInt(2)));
+				}
 				coci.setOci(row.getString(0));
-				coci.setCiting(row.getString(1));
-				coci.setCited(row.getString(2));
+
 				return coci;
 			}, Encoders.bean(COCI.class))
 				.write()
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
@ -0,0 +1,102 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class DownloadORCIDDumpApplication {
+	private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
+
+	private final FileSystem fileSystem;
+
+	public DownloadORCIDDumpApplication(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							DownloadORCIDDumpApplication.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
+		argumentParser.parseArgument(args);
+
+		final String hdfsuri = argumentParser.get("namenode");
+		log.info("hdfsURI is {}", hdfsuri);
+
+		final String targetPath = argumentParser.get("targetPath");
+		log.info("targetPath is {}", targetPath);
+
+		final String apiURL = argumentParser.get("apiURL");
+		log.info("apiURL is {}", apiURL);
+
+		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
+
+		new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
+
+	}
+
+	private void downloadItem(final String name, final String itemURL, final String basePath) {
+		try {
+			final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
+			final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
+			final HttpGet request = new HttpGet(itemURL);
+			final int timeout = 60; // seconds
+			final RequestConfig config = RequestConfig
+				.custom()
+				.setConnectTimeout(timeout * 1000)
+				.setConnectionRequestTimeout(timeout * 1000)
+				.setSocketTimeout(timeout * 1000)
+				.build();
+			log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
+			try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
+				CloseableHttpResponse response = client.execute(request)) {
+				int responseCode = response.getStatusLine().getStatusCode();
+				log.info("Response code is {}", responseCode);
+				if (responseCode >= 200 && responseCode < 400) {
+					IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
+				}
+			} catch (Throwable eu) {
+				throw new RuntimeException(eu);
+			}
+		} catch (Throwable e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	protected void run(final String targetPath, final String apiURL) throws Exception {
+		final ObjectMapper mapper = new ObjectMapper();
+		final URL url = new URL(apiURL);
+		URLConnection conn = url.openConnection();
+		InputStream is = conn.getInputStream();
+		final String json = IOUtils.toString(is);
+		JsonNode jsonNode = mapper.readTree(json);
+		jsonNode
+			.get("files")
+			.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
@ -0,0 +1,71 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class ExtractORCIDDump {
+	private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
+
+	private final FileSystem fileSystem;
+
+	public ExtractORCIDDump(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							DownloadORCIDDumpApplication.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
+		argumentParser.parseArgument(args);
+
+		final String hdfsuri = argumentParser.get("namenode");
+		log.info("hdfsURI is {}", hdfsuri);
+
+		final String sourcePath = argumentParser.get("sourcePath");
+		log.info("sourcePath is {}", sourcePath);
+
+		final String targetPath = argumentParser.get("targetPath");
+		log.info("targetPath is {}", targetPath);
+
+		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
+
+		new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
+
+	}
+
+	public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
+		RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
+		final List<ORCIDExtractor> workers = new ArrayList<>();
+		int i = 0;
+		while (ls.hasNext()) {
+			LocatedFileStatus current = ls.next();
+			if (current.getPath().getName().endsWith("tar.gz")) {
+				workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
+			}
+		}
+		workers.forEach(Thread::start);
+		for (ORCIDExtractor worker : workers) {
+			worker.join();
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
@ -0,0 +1,171 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**\
+ * The ORCIDExtractor class extracts ORCID data from a TAR archive.
+ * The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
+ * Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
+ * Finally, it closes all the SequenceFile.Writer objects.
+ */
+public class ORCIDExtractor extends Thread {
+
+	private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
+
+	private final FileSystem fileSystem;
+
+	private final String id;
+
+	private final Path sourcePath;
+
+	private final String baseOutputPath;
+
+	public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
+		this.fileSystem = fileSystem;
+		this.id = id;
+		this.sourcePath = sourcePath;
+		this.baseOutputPath = baseOutputPath;
+	}
+
+	/**
+	 * creates a map of SequenceFile.Writer objects,
+	 * one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
+	 * For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
+	 * object that writes employment data.
+	 * @return the Map
+	 */
+	private Map<String, SequenceFile.Writer> createMap() {
+		try {
+			log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
+			Map<String, SequenceFile.Writer> res = new HashMap<>();
+			if (sourcePath.getName().contains("summaries")) {
+
+				final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
+				final SequenceFile.Writer summary_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(summaryPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+
+				log.info("Thread {} Creating only summary path here {}", id, summaryPath);
+				res.put("summary", summary_file);
+				return res;
+			} else {
+				String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
+				final SequenceFile.Writer employments_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(employmentsPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+				res.put("employments", employments_file);
+				log.info("Thread {} Creating employments path here {}", id, employmentsPath);
+
+				final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
+				final SequenceFile.Writer works_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(worksPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+				res.put("works", works_file);
+				log.info("Thread {} Creating works path here {}", id, worksPath);
+
+				return res;
+			}
+		} catch (Throwable e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public void run() {
+
+		CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
+		CompressionCodec codec = factory.getCodec(sourcePath);
+		if (codec == null) {
+			System.err.println("No codec found for " + sourcePath.getName());
+			System.exit(1);
+		}
+
+		InputStream gzipInputStream = null;
+		try {
+			gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
+			final Map<String, SequenceFile.Writer> fileMap = createMap();
+			iterateTar(fileMap, gzipInputStream);
+
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		} finally {
+			log.info("Closing gzip stream");
+			IOUtils.closeStream(gzipInputStream);
+		}
+
+	}
+
+	private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
+		if (sourcePath.getName().contains("summaries")) {
+			return fileMap.get("summary");
+		}
+
+		if (path.contains("works")) {
+			return fileMap.get("works");
+		}
+		if (path.contains("employments"))
+			return fileMap.get("employments");
+		return null;
+	}
+
+	private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
+
+		int extractedItem = 0;
+		try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
+
+			TarArchiveEntry entry;
+			while ((entry = tais.getNextTarEntry()) != null) {
+
+				if (entry.isFile()) {
+
+					final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
+					if (fl != null) {
+						final Text key = new Text(entry.getName());
+						final Text value = new Text(
+							org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
+						fl.append(key, value);
+						extractedItem++;
+						if (extractedItem % 100000 == 0) {
+							log.info("Thread {}: Extracted {} items", id, extractedItem);
+							break;
+						}
+					}
+				}
+			}
+		} finally {
+			for (SequenceFile.Writer k : fileMap.values()) {
+				log.info("Thread {}: Completed processed {} items", id, extractedItem);
+				k.hflush();
+				k.close();
+			}
+		}
+
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
@ -0,0 +1,251 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ximpleware.*;
+
+import eu.dnetlib.dhp.collection.orcid.model.*;
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
+
+public class OrcidParser {
+
+	final Logger log = LoggerFactory.getLogger(OrcidParser.class);
+	private VTDNav vn;
+
+	private AutoPilot ap;
+	private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
+	private static final String NS_COMMON = "common";
+	private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
+	private static final String NS_PERSON = "person";
+	private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
+	private static final String NS_DETAILS = "personal-details";
+	private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
+	private static final String NS_OTHER = "other-name";
+	private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
+	private static final String NS_RECORD = "record";
+	private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
+	private static final String NS_ACTIVITIES = "activities";
+	private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
+	private static final String NS_WORK = "work";
+	private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
+
+	private static final String NS_ERROR = "error";
+	private static final String NS_HISTORY = "history";
+	private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
+	private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
+	private static final String NS_BULK = "bulk";
+	private static final String NS_EXTERNAL = "external-identifier";
+	private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
+
+	private void generateParsedDocument(final String xml) throws ParseException {
+		final VTDGen vg = new VTDGen();
+		vg.setDoc(xml.getBytes());
+		vg.parse(true);
+		this.vn = vg.getNav();
+		this.ap = new AutoPilot(vn);
+		ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
+		ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
+		ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
+		ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
+		ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
+		ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
+		ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
+		ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
+		ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
+		ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
+	}
+
+	public Author parseSummary(final String xml) {
+
+		try {
+			final Author author = new Author();
+			generateParsedDocument(xml);
+			List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(
+					ap, vn, "//record:record", Arrays.asList("path"));
+			if (!recordNodes.isEmpty()) {
+				final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
+				author.setOrcid(oid);
+			} else {
+				return null;
+			}
+			List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(
+					ap, vn, "//person:name", Arrays.asList("visibility"));
+			final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
+			author.setVisibility(visibility);
+			final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
+			author.setGivenName(name);
+
+			final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
+			author.setFamilyName(surnames);
+
+			final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
+			author.setCreditName(creditNames);
+
+			final String biography = VtdUtilityParser
+				.getSingleValue(ap, vn, "//person:biography/personal-details:content");
+			author.setBiography(biography);
+
+			final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
+			if (!otherNames.isEmpty()) {
+				author.setOtherNames(otherNames);
+			}
+
+			ap.selectXPath("//external-identifier:external-identifier");
+
+			while (ap.evalXPath() != -1) {
+				final Pid pid = new Pid();
+
+				final AutoPilot ap1 = new AutoPilot(ap.getNav());
+
+				ap1.selectXPath("./common:external-id-type");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setSchema(vn.toNormalizedString(it));
+				}
+				ap1.selectXPath("./common:external-id-value");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setValue(vn.toNormalizedString(it));
+				}
+
+				author.addOtherPid(pid);
+			}
+
+			return author;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+	}
+
+	public Work parseWork(final String xml) {
+
+		try {
+			final Work work = new Work();
+			generateParsedDocument(xml);
+			List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
+			if (!workNodes.isEmpty()) {
+				final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
+				work.setOrcid(oid);
+			} else {
+				return null;
+			}
+
+			ap.selectXPath("//common:external-id");
+
+			while (ap.evalXPath() != -1) {
+				final Pid pid = new Pid();
+
+				final AutoPilot ap1 = new AutoPilot(ap.getNav());
+
+				ap1.selectXPath("./common:external-id-type");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setSchema(vn.toNormalizedString(it));
+				}
+				ap1.selectXPath("./common:external-id-value");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setValue(vn.toNormalizedString(it));
+				}
+
+				work.addPid(pid);
+			}
+
+			work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
+
+			return work;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+
+	}
+
+	private String extractEmploymentDate(final String xpath) throws Exception {
+
+		ap.selectXPath(xpath);
+		StringBuilder sb = new StringBuilder();
+		while (ap.evalXPath() != -1) {
+			final AutoPilot ap1 = new AutoPilot(ap.getNav());
+			ap1.selectXPath("./common:year");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append(vn.toNormalizedString(it));
+			}
+			ap1.selectXPath("./common:month");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append("-");
+				sb.append(vn.toNormalizedString(it));
+			}
+			ap1.selectXPath("./common:day");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append("-");
+				sb.append(vn.toNormalizedString(it));
+			}
+		}
+		return sb.toString();
+
+	}
+
+	public Employment parseEmployment(final String xml) {
+		try {
+			final Employment employment = new Employment();
+			generateParsedDocument(xml);
+			final String oid = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:source-orcid/common:path");
+			if (StringUtils.isNotBlank(oid)) {
+				employment.setOrcid(oid);
+			} else {
+				return null;
+			}
+			final String depName = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:department-name");
+			final String rolTitle = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:role-title");
+			if (StringUtils.isNotBlank(rolTitle))
+				employment.setRoleTitle(rolTitle);
+			if (StringUtils.isNotBlank(depName))
+				employment.setDepartmentName(depName);
+			else
+				employment
+					.setDepartmentName(
+						VtdUtilityParser
+							.getSingleValue(ap, vn, "//common:organization/common:name"));
+
+			employment.setStartDate(extractEmploymentDate("//common:start-date"));
+			employment.setEndDate(extractEmploymentDate("//common:end-date"));
+
+			final String affiliationId = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
+			final String affiliationIdType = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:disambiguation-source");
+
+			if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
+				employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
+
+			return employment;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
@ -0,0 +1,83 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Author extends ORCIDItem {
+	private String givenName;
+	private String familyName;
+
+	private String visibility;
+
+	private String creditName;
+
+	private List<String> otherNames;
+
+	private List<Pid> otherPids;
+
+	private String biography;
+
+	public String getBiography() {
+		return biography;
+	}
+
+	public void setBiography(String biography) {
+		this.biography = biography;
+	}
+
+	public String getGivenName() {
+		return givenName;
+	}
+
+	public void setGivenName(String givenName) {
+		this.givenName = givenName;
+	}
+
+	public String getFamilyName() {
+		return familyName;
+	}
+
+	public void setFamilyName(String familyName) {
+		this.familyName = familyName;
+	}
+
+	public String getCreditName() {
+		return creditName;
+	}
+
+	public void setCreditName(String creditName) {
+		this.creditName = creditName;
+	}
+
+	public List<String> getOtherNames() {
+		return otherNames;
+	}
+
+	public void setOtherNames(List<String> otherNames) {
+		this.otherNames = otherNames;
+	}
+
+	public String getVisibility() {
+		return visibility;
+	}
+
+	public void setVisibility(String visibility) {
+		this.visibility = visibility;
+	}
+
+	public List<Pid> getOtherPids() {
+		return otherPids;
+	}
+
+	public void setOtherPids(List<Pid> otherPids) {
+		this.otherPids = otherPids;
+	}
+
+	public void addOtherPid(final Pid pid) {
+
+		if (otherPids == null)
+			otherPids = new ArrayList<>();
+		otherPids.add(pid);
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class Employment extends ORCIDItem {
+
+	private String startDate;
+	private String EndDate;
+
+	private Pid affiliationId;
+
+	private String departmentName;
+
+	private String roleTitle;
+
+	public String getStartDate() {
+		return startDate;
+	}
+
+	public void setStartDate(String startDate) {
+		this.startDate = startDate;
+	}
+
+	public String getEndDate() {
+		return EndDate;
+	}
+
+	public void setEndDate(String endDate) {
+		EndDate = endDate;
+	}
+
+	public Pid getAffiliationId() {
+		return affiliationId;
+	}
+
+	public void setAffiliationId(Pid affiliationId) {
+		this.affiliationId = affiliationId;
+	}
+
+	public String getDepartmentName() {
+		return departmentName;
+	}
+
+	public void setDepartmentName(String departmentName) {
+		this.departmentName = departmentName;
+	}
+
+	public String getRoleTitle() {
+		return roleTitle;
+	}
+
+	public void setRoleTitle(String roleTitle) {
+		this.roleTitle = roleTitle;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class ORCIDItem {
+	private String orcid;
+
+	public String getOrcid() {
+		return orcid;
+	}
+
+	public void setOrcid(String orcid) {
+		this.orcid = orcid;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class Pid {
+
+	private String value;
+
+	private String schema;
+
+	public Pid() {
+	}
+
+	public Pid(String value, String schema) {
+		this.value = value;
+		this.schema = schema;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+
+	public String getSchema() {
+		return schema;
+	}
+
+	public void setSchema(String schema) {
+		this.schema = schema;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
@ -0,0 +1,35 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Work extends ORCIDItem {
+
+	private String title;
+
+	private List<Pid> pids;
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+
+	public List<Pid> getPids() {
+		return pids;
+	}
+
+	public void setPids(List<Pid> pids) {
+		this.pids = pids;
+	}
+
+	public void addPid(Pid pid) {
+		if (pids == null)
+			pids = new ArrayList<>();
+		pids.add(pid);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
@ -6,9 +6,15 @@
    "paramRequired": false
  },
  {
-    "paramName": "ip",
-    "paramLongName": "inputPath",
-    "paramDescription": "the URL from where to get the programme file",
+    "paramName": "cip",
+    "paramLongName": "crossrefInputPath",
+    "paramDescription": "the path to get the input data from Crossref",
+    "paramRequired": true
+  },
+  {
+    "paramName": "pip",
+    "paramLongName": "pubmedInputPath",
+    "paramDescription": "the path to get the input data from Pubmed",
    "paramRequired": true
  },
  {
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -31,5 +31,6 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
 # The following is needed as a property of a workflow
 oozie.wf.application.path=${oozieTopWfApplicationPath}

-inputPath=/data/bip-affiliations/data.json
+crossrefInputPath=/data/bip-affiliations/data.json
+pubmedInputPath=/data/bip-affiliations/pubmed-data.json
 outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -2,8 +2,12 @@
    <parameters>

        <property>
-            <name>inputPath</name>
-            <description>the path where to find the inferred affiliation relations</description>
+            <name>crossrefInputPath</name>
+            <description>the path where to find the inferred affiliation relations from Crossref</description>
+        </property>
+        <property>
+            <name>pubmedInputPath</name>
+            <description>the path where to find the inferred affiliation relations from Pubmed</description>
        </property>
        <property>
            <name>outputPath</name>
@ -83,7 +87,7 @@
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
-            <name>Produces the atomic action with the inferred by BIP! affiliation relations from Crossref</name>
+            <name>Produces the atomic action with the inferred by BIP! affiliation relations (from Crossref and Pubmed)</name>
            <class>eu.dnetlib.dhp.actionmanager.bipaffiliations.PrepareAffiliationRelations</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>
@ -96,7 +100,8 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--inputPath</arg><arg>${inputPath}</arg>
+            <arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
+            <arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml
@ -5,11 +5,6 @@
            <name>fosPath</name>
            <description>the input path of the resources to be extended</description>
        </property>
-
-        <property>
-            <name>bipScorePath</name>
-            <description>the path where to find the bipFinder scores</description>
-        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -77,35 +72,10 @@


    <fork name="prepareInfo">
-        <path start="prepareBip"/>
        <path start="getFOS"/>
        <path start="getSDG"/>
    </fork>

-    <action name="prepareBip">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Produces the unresolved from BIP! Finder</name>
-            <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class>
-            <jar>dhp-aggregation-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${bipScorePath}</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/prepared</arg>
-        </spark>
-        <ok to="join"/>
-        <error to="Kill"/>
-    </action>
-
    <action name="getFOS">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -125,6 +95,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${fosPath}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/input/fos</arg>
+            <arg>--delimiter</arg><arg>${delimiter}</arg>
        </spark>
        <ok to="prepareFos"/>
        <error to="Kill"/>
@ -213,7 +184,7 @@
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
-            <name>Saves the result produced for bip and fos by grouping results with the same id</name>
+            <name>Save the unresolved entities grouping results with the same id</name>
            <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.SparkSaveUnresolved</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json
@ -16,10 +16,11 @@
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the hdfs name node",
    "paramRequired": false
-  },  {
-  "paramName": "sdr",
-  "paramLongName": "shouldDuplicateRels",
-  "paramDescription": "the hdfs name node",
-  "paramRequired": false
-}
+  },
+  {
+    "paramName": "sdr",
+    "paramLongName": "shouldDuplicateRels",
+    "paramDescription": "activates/deactivates the construction of bidirectional relations Cites/IsCitedBy",
+    "paramRequired": false
+  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
@ -16,5 +16,11 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
+  },
+  {
+    "paramName": "p",
+    "paramLongName": "prefix",
+    "paramDescription": "COCI or POCI",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
@ -30,7 +30,12 @@
    "paramLongName": "inputFile",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
-  }
+  }, {
+  "paramName": "f",
+  "paramLongName": "format",
+  "paramDescription": "the hdfs name node",
+  "paramRequired": true
+}
 ]


--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
@ -34,6 +34,7 @@
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
+
    <action name="download">
        <shell xmlns="uri:oozie:shell-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
@ -46,7 +47,7 @@
            </configuration>
            <exec>download.sh</exec>
            <argument>${filelist}</argument>
-            <argument>${workingPath}/Original</argument>
+            <argument>${workingPath}/${prefix}/Original</argument>
            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
            <file>download.sh</file>
            <capture-output/>
@ -54,12 +55,14 @@
        <ok to="extract"/>
        <error to="Kill"/>
    </action>
+
    <action name="extract">
        <java>
            <main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
            <arg>--inputFile</arg><arg>${inputFile}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingPath</arg><arg>${workingPath}/${prefix}</arg>
+            <arg>--prefix</arg><arg>${prefix}</arg>
        </java>
        <ok to="read"/>
        <error to="Kill"/>
@ -82,10 +85,11 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}/COCI</arg>
-            <arg>--outputPath</arg><arg>${workingPath}/COCI_JSON/</arg>
+            <arg>--workingPath</arg><arg>${workingPath}/${prefix}/${prefix}</arg>
+            <arg>--outputPath</arg><arg>${workingPath}/${prefix}/${prefix}_JSON/</arg>
            <arg>--delimiter</arg><arg>${delimiter}</arg>
            <arg>--inputFile</arg><arg>${inputFileCoci}</arg>
+            <arg>--format</arg><arg>${prefix}</arg>
        </spark>
        <ok to="create_actionset"/>
        <error to="Kill"/>
@ -108,7 +112,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--inputPath</arg><arg>${workingPath}/COCI_JSON</arg>
+            <arg>--inputPath</arg><arg>${workingPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH where download the files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "a",
+    "paramLongName": "apiURL",
+    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH to extract files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "s",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the PATH where the tar.gz files were downloaded",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "m",
+    "paramLongName": "master",
+    "paramDescription": "the master name",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH of the DF tables",
+    "paramRequired": true
+  },
+  {
+    "paramName": "s",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the PATH of the ORCID sequence file",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
@ -0,0 +1,23 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
@ -0,0 +1,81 @@
+<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>targetPath</name>
+            <description>the  path  to store the original ORCID dump</description>
+        </property>
+        <property>
+            <name>apiURL</name>
+            <description>The figshare  API URL to retrieve the list file to download</description>
+        </property>
+    </parameters>
+
+    <start to="generateTables"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="DownloadDUMP">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--apiURL</arg><arg>${apiURL}</arg>
+        </java>
+        <ok to="extractDump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extractDump">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+
+            <main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
+            <java-opts> -Xmx6g </java-opts>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--sourcePath</arg><arg>${targetPath}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
+        </java>
+        <ok to="generateTables"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="generateTables">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Generate ORCID Tables</name>
+            <class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=2g
+                --conf spark.sql.shuffle.partitions=3000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
+            <arg>--targetPath</arg><arg>${targetPath}/tables</arg>
+            <arg>--master</arg><arg>yarn</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH where download the files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "a",
+    "paramLongName": "apiURL",
+    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
@ -0,0 +1,101 @@
+package eu.dnetlib.dhp.collection.orcid
+
+import eu.dnetlib.dhp.application.AbstractScalaApplication
+import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
+import org.apache.hadoop.io.Text
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
+    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
+
+  /** Here all the spark applications runs this method
+    * where the whole logic of the spark node is defined
+    */
+  override def run(): Unit = {
+    val sourcePath: String = parser.get("sourcePath")
+    log.info("found parameters sourcePath: {}", sourcePath)
+    val targetPath: String = parser.get("targetPath")
+    log.info("found parameters targetPath: {}", targetPath)
+    extractORCIDTable(spark, sourcePath, targetPath)
+    extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
+    extractORCIDWorksTable(spark, sourcePath, targetPath)
+  }
+
+  def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
+//    implicit  val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("summaries"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseSummary(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Authors")
+  }
+
+  def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
+    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("works"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseWork(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Works")
+  }
+
+  def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
+    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("employments"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseEmployment(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Employments")
+  }
+}
+
+object SparkGenerateORCIDTable {
+
+  val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
+
+  def main(args: Array[String]): Unit = {
+
+    new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
+      .initialize()
+      .run()
+
+  }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -166,7 +166,7 @@ object DataciteToOAFTransformation {
    resourceTypeGeneral: String,
    schemaOrg: String,
    vocabularies: VocabularyGroup
-  ): (Qualifier, Qualifier) = {
+  ): (Qualifier, Qualifier, String) = {
    if (resourceType != null && resourceType.nonEmpty) {
      val typeQualifier =
        vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          )
+          ),
+          resourceType
        )
    }
    if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          )
+          ),
+          schemaOrg
        )

    }
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          )
+          ),
+          resourceTypeGeneral
        )

    }
@ -216,12 +219,18 @@ object DataciteToOAFTransformation {
    schemaOrg: String,
    vocabularies: VocabularyGroup
  ): Result = {
-    val typeQualifiers: (Qualifier, Qualifier) =
+    val typeQualifiers: (Qualifier, Qualifier, String) =
      getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
    if (typeQualifiers == null)
      return null
    val i = new Instance
    i.setInstancetype(typeQualifiers._1)
+    // ADD ORIGINAL TYPE
+    val itm = new InstanceTypeMapping
+    itm.setOriginalType(typeQualifiers._3)
+    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+    i.setInstanceTypeMapping(List(itm).asJava)
+
    typeQualifiers._2.getClassname match {
      case "dataset" =>
        val r = new OafDataset
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -176,7 +176,7 @@ object BioDBToOAF {
      i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava)
    }

-    if (input.pidType.equalsIgnoreCase("clinicaltrials.gov"))
+    if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) {
      i.setInstancetype(
        OafMapperUtils.qualifier(
          "0037",
@ -185,7 +185,11 @@ object BioDBToOAF {
          ModelConstants.DNET_PUBLICATION_RESOURCE
        )
      )
-    else
+      val itm = new InstanceTypeMapping
+      itm.setOriginalType(input.pidType)
+      itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+      i.setInstanceTypeMapping(List(itm).asJava)
+    } else {
      i.setInstancetype(
        OafMapperUtils.qualifier(
          "0046",
@ -194,6 +198,11 @@ object BioDBToOAF {
          ModelConstants.DNET_PUBLICATION_RESOURCE
        )
      )
+      val itm = new InstanceTypeMapping
+      itm.setOriginalType("Bioentity")
+      itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+      i.setInstanceTypeMapping(List(itm).asJava)
+    }

    if (input.datasource == null || input.datasource.isEmpty)
      return null
@ -265,6 +274,10 @@ object BioDBToOAF {
        ModelConstants.DNET_PUBLICATION_RESOURCE
      )
    )
+    val itm = new InstanceTypeMapping
+    itm.setOriginalType("Bioentity")
+    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+    i.setInstanceTypeMapping(List(itm).asJava)

    i.setCollectedfrom(collectedFromMap("uniprot"))
    d.setInstance(List(i).asJava)
@ -471,6 +484,10 @@ object BioDBToOAF {
        ModelConstants.DNET_PUBLICATION_RESOURCE
      )
    )
+    val itm = new InstanceTypeMapping
+    itm.setOriginalType("Bioentity")
+    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+    i.setInstanceTypeMapping(List(itm).asJava)

    i.setCollectedfrom(collectedFromMap("pdb"))
    d.setInstance(List(i).asJava)
@ -571,6 +588,10 @@ object BioDBToOAF {
        ModelConstants.DNET_PUBLICATION_RESOURCE
      )
    )
+    val itm = new InstanceTypeMapping
+    itm.setOriginalType("Bioentity")
+    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+    i.setInstanceTypeMapping(List(itm).asJava)

    i.setCollectedfrom(collectedFromMap("ebi"))
    d.setInstance(List(i).asJava)
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
@ -188,13 +188,24 @@ object PubMedToOaf {
      val cojbCategory =
        getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
      pubmedInstance.setInstancetype(cojbCategory)
+      // ADD ORIGINAL TYPE to the publication
+      val itm = new InstanceTypeMapping
+      itm.setOriginalType(ja.get.getValue)
+      itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+      pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
    } else {
      val i_type = article.getPublicationTypes.asScala
-        .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
-        .find(q => q != null)
-      if (i_type.isDefined)
-        pubmedInstance.setInstancetype(i_type.get)
-      else
+        .map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
+        .find(q => q._2 != null)
+
+      if (i_type.isDefined) {
+        pubmedInstance.setInstancetype(i_type.get._2)
+        // ADD ORIGINAL TYPE to the publication
+        val itm = new InstanceTypeMapping
+        itm.setOriginalType(i_type.get._1)
+        itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
+        pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
+      } else
        return null
    }
    val result = createResult(pubmedInstance.getInstancetype, vocabularies)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -74,7 +74,11 @@ public class PrepareAffiliationRelationsTest {
 	@Test
 	void testMatch() throws Exception {

-		String affiliationRelationsPath = getClass()
+		String crossrefAffiliationRelationPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
+			.getPath();
+
+		String pubmedAffiliationRelationsPath = getClass()
 			.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
 			.getPath();

@ -84,7 +88,8 @@ public class PrepareAffiliationRelationsTest {
 			.main(
 				new String[] {
 					"-isSparkSessionManaged", Boolean.FALSE.toString(),
-					"-inputPath", affiliationRelationsPath,
+					"-crossrefInputPath", crossrefAffiliationRelationPath,
+					"-pubmedInputPath", pubmedAffiliationRelationsPath,
 					"-outputPath", outputPath
 				});

@ -101,7 +106,7 @@ public class PrepareAffiliationRelationsTest {
 //            );
 //        }
 		// count the number of relations
-		assertEquals(20, tmp.count());
+		assertEquals(40, tmp.count());

 		Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
 		dataset.createOrReplaceTempView("result");
@ -112,7 +117,7 @@ public class PrepareAffiliationRelationsTest {
 		// verify that we have equal number of bi-directional relations
 		Assertions
 			.assertEquals(
-				10, execVerification
+				20, execVerification
 					.filter(
 						"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
 					.collectAsList()
@ -120,7 +125,7 @@ public class PrepareAffiliationRelationsTest {

 		Assertions
 			.assertEquals(
-				10, execVerification
+				20, execVerification
 					.filter(
 						"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
 					.collectAsList()
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java
@ -13,10 +13,7 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -68,6 +65,7 @@ public class GetFosTest {
 	}

 	@Test
+	@Disabled
 	void test3() throws Exception {
 		final String sourcePath = getClass()
 			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv")
@ -96,4 +94,37 @@ public class GetFosTest {
 		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));

 	}
+
+	@Test
+	void test4() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv")
+			.getPath();
+
+		final String outputPath = workingDir.toString() + "/fos.json";
+		GetFOSSparkJob
+			.main(
+				new String[] {
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--sourcePath", sourcePath,
+					"--delimiter", ",",
+					"-outputPath", outputPath
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<FOSDataModel> tmp = sc
+			.textFile(outputPath)
+			.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
+
+		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel4() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getScoreL3() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getScoreL4() != null));
+
+	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java
@ -67,92 +67,6 @@ public class PrepareTest {
 		spark.stop();
 	}

-	@Test
-	void bipPrepareTest() throws Exception {
-		final String sourcePath = getClass()
-			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json")
-			.getPath();
-
-		PrepareBipFinder
-			.main(
-				new String[] {
-					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-					"--sourcePath", sourcePath,
-					"--outputPath", workingDir.toString() + "/work"
-
-				});
-
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-		JavaRDD<Result> tmp = sc
-			.textFile(workingDir.toString() + "/work/bip")
-			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
-
-		Assertions.assertEquals(86, tmp.count());
-
-		String doi1 = "unresolved::10.0000/096020199389707::doi";
-
-		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count());
-		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size());
-		Assertions
-			.assertEquals(
-				3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size());
-		Assertions
-			.assertEquals(
-				"6.34596412687e-09", tmp
-					.filter(r -> r.getId().equals(doi1))
-					.collect()
-					.get(0)
-					.getInstance()
-					.get(0)
-					.getMeasures()
-					.stream()
-					.filter(sl -> sl.getId().equals("influence"))
-					.collect(Collectors.toList())
-					.get(0)
-					.getUnit()
-					.get(0)
-					.getValue());
-		Assertions
-			.assertEquals(
-				"0.641151896994", tmp
-					.filter(r -> r.getId().equals(doi1))
-					.collect()
-					.get(0)
-					.getInstance()
-					.get(0)
-					.getMeasures()
-					.stream()
-					.filter(sl -> sl.getId().equals("popularity_alt"))
-					.collect(Collectors.toList())
-					.get(0)
-					.getUnit()
-					.get(0)
-					.getValue());
-		Assertions
-			.assertEquals(
-				"2.33375102921e-09", tmp
-					.filter(r -> r.getId().equals(doi1))
-					.collect()
-					.get(0)
-					.getInstance()
-					.get(0)
-					.getMeasures()
-					.stream()
-					.filter(sl -> sl.getId().equals("popularity"))
-					.collect(Collectors.toList())
-					.get(0)
-					.getUnit()
-					.get(0)
-					.getValue());
-
-		final String doi2 = "unresolved::10.3390/s18072310::doi";
-
-		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count());
-		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size());
-
-	}
-
 	@Test
 	void fosPrepareTest() throws Exception {
 		final String sourcePath = getClass()
@ -222,6 +136,76 @@ public class PrepareTest {

 	}

+	@Test
+	void fosPrepareTest2() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json")
+			.getPath();
+
+		PrepareFOSSparkJob
+			.main(
+				new String[] {
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--sourcePath", sourcePath,
+
+					"-outputPath", workingDir.toString() + "/work"
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Result> tmp = sc
+			.textFile(workingDir.toString() + "/work/fos")
+			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
+
+		String doi1 = "unresolved::10.1016/j.revmed.2006.07.012::doi";
+
+		assertEquals(13, tmp.count());
+		assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count());
+
+		Result result = tmp
+			.filter(r -> r.getId().equals(doi1))
+			.first();
+
+		result.getSubject().forEach(s -> System.out.println(s.getValue() + " trust = " + s.getDataInfo().getTrust()));
+		Assertions.assertEquals(6, result.getSubject().size());
+
+		assertTrue(
+			result
+				.getSubject()
+				.stream()
+				.anyMatch(
+					s -> s.getValue().contains("03 medical and health sciences")
+						&& s.getDataInfo().getTrust().equals("")));
+
+		assertTrue(
+			result
+				.getSubject()
+				.stream()
+				.anyMatch(
+					s -> s.getValue().contains("0302 clinical medicine") && s.getDataInfo().getTrust().equals("")));
+
+		assertTrue(
+			result
+				.getSubject()
+				.stream()
+				.anyMatch(
+					s -> s
+						.getValue()
+						.contains("030204 cardiovascular system & hematology")
+						&& s.getDataInfo().getTrust().equals("0.5101401805877686")));
+		assertTrue(
+			result
+				.getSubject()
+				.stream()
+				.anyMatch(
+					s -> s
+						.getValue()
+						.contains("03020409 Hematology/Coagulopathies")
+						&& s.getDataInfo().getTrust().equals("0.0546871414174914")));
+
+	}
+
 	@Test
 	void sdgPrepareTest() throws Exception {
 		final String sourcePath = getClass()
@ -268,57 +252,4 @@ public class PrepareTest {

 	}

-//	@Test
-//	void test3() throws Exception {
-//		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz";
-//
-//		final String outputPath = workingDir.toString() + "/fos.json";
-//		GetFOSSparkJob
-//			.main(
-//				new String[] {
-//					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-//					"--sourcePath", sourcePath,
-//
-//					"-outputPath", outputPath
-//
-//				});
-//
-//		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-//
-//		JavaRDD<FOSDataModel> tmp = sc
-//			.textFile(outputPath)
-//			.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
-//
-//		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
-//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
-//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
-//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
-//
-//	}
-//
-//	@Test
-//	void test4() throws Exception {
-//		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz";
-//
-//		final String outputPath = workingDir.toString() + "/sdg.json";
-//		GetSDGSparkJob
-//			.main(
-//				new String[] {
-//					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-//					"--sourcePath", sourcePath,
-//
-//					"-outputPath", outputPath
-//
-//				});
-//
-//		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-//
-//		JavaRDD<SDGDataModel> tmp = sc
-//			.textFile(outputPath)
-//			.map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class));
-//
-//		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
-//		tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null));
-//
-//	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
@ -340,18 +340,7 @@ public class ProduceTest {
 	}

 	private JavaRDD<Result> getResultJavaRDD() throws Exception {
-		final String bipPath = getClass()
-			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json")
-			.getPath();

-		PrepareBipFinder
-			.main(
-				new String[] {
-					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-					"--sourcePath", bipPath,
-					"--outputPath", workingDir.toString() + "/work"
-
-				});
 		final String fosPath = getClass()
 			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json")
 			.getPath();
@ -379,6 +368,40 @@ public class ProduceTest {
 			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
 	}

+	@Test
+	public JavaRDD<Result> getResultFosJavaRDD() throws Exception {
+
+		final String fosPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json")
+			.getPath();
+
+		PrepareFOSSparkJob
+			.main(
+				new String[] {
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--sourcePath", fosPath,
+					"-outputPath", workingDir.toString() + "/work"
+				});
+
+		SparkSaveUnresolved.main(new String[] {
+			"--isSparkSessionManaged", Boolean.FALSE.toString(),
+			"--sourcePath", workingDir.toString() + "/work",
+
+			"-outputPath", workingDir.toString() + "/unresolved"
+
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Result> tmp = sc
+			.textFile(workingDir.toString() + "/unresolved")
+			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
+		tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
+
+		return tmp;
+
+	}
+
 	@Test
 	void prepareTest5Subjects() throws Exception {
 		final String doi = "unresolved::10.1063/5.0032658::doi";
@ -415,18 +438,7 @@ public class ProduceTest {
 	}

 	private JavaRDD<Result> getResultJavaRDDPlusSDG() throws Exception {
-		final String bipPath = getClass()
-			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json")
-			.getPath();

-		PrepareBipFinder
-			.main(
-				new String[] {
-					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-					"--sourcePath", bipPath,
-					"--outputPath", workingDir.toString() + "/work"
-
-				});
 		final String fosPath = getClass()
 			.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json")
 			.getPath();
@ -483,14 +495,6 @@ public class ProduceTest {
 					.filter(row -> row.getSubject() != null)
 					.count());

-		Assertions
-			.assertEquals(
-				85,
-				tmp
-					.filter(row -> !row.getId().equals(doi))
-					.filter(r -> r.getInstance() != null && r.getInstance().size() > 0)
-					.count());
-
 	}

 	@Test
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
@ -0,0 +1,119 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.ximpleware.NavException;
+import com.ximpleware.ParseException;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import eu.dnetlib.dhp.collection.orcid.model.Author;
+import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
+import eu.dnetlib.dhp.parser.utility.VtdException;
+
+public class DownloadORCIDTest {
+	private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
+
+	@Test
+	public void testSummary() throws Exception {
+		final String xml = IOUtils
+			.toString(
+				Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
+
+		final OrcidParser parser = new OrcidParser();
+		ORCIDItem orcidItem = parser.parseSummary(xml);
+
+		final ObjectMapper mapper = new ObjectMapper();
+		System.out.println(mapper.writeValueAsString(orcidItem));
+
+	}
+
+	@Test
+	public void testParsingWork() throws Exception {
+
+		final List<String> works_path = Arrays
+			.asList(
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
+
+			);
+
+		final OrcidParser parser = new OrcidParser();
+		final ObjectMapper mapper = new ObjectMapper();
+		works_path.stream().map(s -> {
+			try {
+				return IOUtils
+					.toString(
+						Objects
+							.requireNonNull(
+								getClass()
+									.getResourceAsStream(
+										s)));
+			} catch (IOException e) {
+				throw new RuntimeException(e);
+			}
+		}).forEach(s -> {
+			try {
+				System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+		});
+	}
+
+	@Test
+	public void testParsingEmployments() throws Exception {
+
+		final List<String> works_path = Arrays
+			.asList(
+				"/eu/dnetlib/dhp/collection/orcid/employment.xml",
+				"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
+				"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
+
+			);
+
+		final OrcidParser parser = new OrcidParser();
+		final ObjectMapper mapper = new ObjectMapper();
+		works_path.stream().map(s -> {
+			try {
+				return IOUtils
+					.toString(
+						Objects
+							.requireNonNull(
+								getClass()
+									.getResourceAsStream(
+										s)));
+			} catch (IOException e) {
+				throw new RuntimeException(e);
+			}
+		}).forEach(s -> {
+			try {
+				System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+		});
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv
@ -0,0 +1,26 @@
+DOI,OAID,level1,level2,level3,level4,score_for_L3,score_for_L4
+10.1016/j.anucene.2006.02.004,doi_________::00059d9963edf633bec756fb21b5bd72,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020209 energy,02020908 Climate change policy/Ethanol fuel,0.5,0.5
+10.1016/j.anucene.2006.02.004,doi_________::00059d9963edf633bec756fb21b5bd72,02 engineering and technology,0211 other engineering and technologies,021108 energy,02110808 Climate change policy/Ethanol fuel,0.5,0.5
+10.1016/j.revmed.2006.07.010,doi_________::0026476c1651a92c933d752ff12496c7,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis,N/A,0.5036656856536865,0.0
+10.1016/j.revmed.2006.07.010,doi_________::0026476c1651a92c933d752ff12496c7,03 medical and health sciences,0302 clinical medicine,030212 general & internal medicine,N/A,0.4963343143463135,0.0
+10.20965/jrm.2006.p0312,doi_________::0028336a2f3826cc83c47dbefac71543,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation,02090104 Robotics/Robots,0.6111094951629639,0.5053805979936855
+10.20965/jrm.2006.p0312,doi_________::0028336a2f3826cc83c47dbefac71543,01 natural sciences,0104 chemical sciences,010401 analytical chemistry,N/A,0.3888905048370361,0.0
+10.1111/j.1747-7379.2006.040_1.x,doi_________::002c7077e7c114a8304eb90f59e45fa4,05 social sciences,0506 political science,050602 political science & public administration,05060202 Ethnic groups/Ethnicity,0.6159052848815918,0.7369035568037298
+10.1111/j.1747-7379.2006.040_1.x,doi_________::002c7077e7c114a8304eb90f59e45fa4,05 social sciences,0502 economics and business,050207 economics,N/A,0.3840946555137634,0.0
+10.1007/s10512-006-0049-9,doi_________::003f29f9254819cf4c78558b1bc25f10,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020209 energy,02020908 Climate change policy/Ethanol fuel,0.5,0.5
+10.1007/s10512-006-0049-9,doi_________::003f29f9254819cf4c78558b1bc25f10,02 engineering and technology,0211 other engineering and technologies,021108 energy,02110808 Climate change policy/Ethanol fuel,0.5,0.5
+10.1111/j.1365-2621.2005.01045.x,doi_________::00419355b4c3e0646bd0e1b301164c8e,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040401 food science,04040102 Food science/Food industry,0.5,0.5
+10.1111/j.1365-2621.2005.01045.x,doi_________::00419355b4c3e0646bd0e1b301164c8e,04 agricultural and veterinary sciences,0405 other agricultural sciences,040502 food science,04050202 Food science/Food industry,0.5,0.5
+10.1002/chin.200617262,doi_________::004c8cef80668904961b9e62841793c8,01 natural sciences,0104 chemical sciences,010405 organic chemistry,01040508 Functional groups/Ethers,0.5566747188568115,0.5582916736602783
+10.1002/chin.200617262,doi_________::004c8cef80668904961b9e62841793c8,01 natural sciences,0104 chemical sciences,010402 general chemistry,01040207 Chemical synthesis/Total synthesis,0.4433253407478332,0.4417082965373993
+10.1016/j.revmed.2006.07.012,doi_________::005b1d0fb650b680abaf6cfe26a21604,03 medical and health sciences,0302 clinical medicine,030204 cardiovascular system & hematology,03020409 Hematology/Coagulopathies,0.5101401805877686,0.0546871414174914
+10.1016/j.revmed.2006.07.012,doi_________::005b1d0fb650b680abaf6cfe26a21604,03 medical and health sciences,0301 basic medicine,030105 genetics & heredity,N/A,0.4898599088191986,0.0
+10.4109/jslab.17.132,doi_________::00889baa06de363e37930daaf8e800c0,03 medical and health sciences,0301 basic medicine,030104 developmental biology,N/A,0.5,0.0
+10.4109/jslab.17.132,doi_________::00889baa06de363e37930daaf8e800c0,03 medical and health sciences,0303 health sciences,030304 developmental biology,N/A,0.5,0.0
+10.1108/00251740610715687,doi_________::0092cb1b1920d556719385a26363ecaa,05 social sciences,0502 economics and business,050203 business & management,05020311 International business/International trade,0.605047881603241,0.2156608108845153
+10.1108/00251740610715687,doi_________::0092cb1b1920d556719385a26363ecaa,05 social sciences,0502 economics and business,050211 marketing,N/A,0.394952118396759,0.0
+10.1080/03067310500248098,doi_________::00a76678d230e3f20b6356804448028f,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040401 food science,04040102 Food science/Food industry,0.5,0.5
+10.1080/03067310500248098,doi_________::00a76678d230e3f20b6356804448028f,04 agricultural and veterinary sciences,0405 other agricultural sciences,040502 food science,04050202 Food science/Food industry,0.5,0.5
+10.3152/147154306781778533,doi_________::00acc520f3939e5a6675343881fed4f2,05 social sciences,0502 economics and business,050203 business & management,05020307 Innovation/Product management,0.5293408632278442,0.5326762795448303
+10.3152/147154306781778533,doi_________::00acc520f3939e5a6675343881fed4f2,05 social sciences,0509 other social sciences,050905 science studies,05090502 Social philosophy/Capitalism,0.4706590473651886,0.4673237204551697
+10.1785/0120050806,doi_________::00d5831d329e7ae4523d78bfc3042e98,02 engineering and technology,0211 other engineering and technologies,021101 geological & geomatics engineering,02110103 Concrete/Building materials,0.5343400835990906,0.3285667930180677
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json
@ -0,0 +1,25 @@
+{"doi":"10.1016/j.anucene.2006.02.004","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020209 energy","level4":"02020908 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1016/j.anucene.2006.02.004","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021108 energy","level4":"02110808 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1016/j.revmed.2006.07.010","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis","level4":"N/A","scoreL3":"0.5036656856536865","scoreL4":"0.0"}
+{"doi":"10.1016/j.revmed.2006.07.010","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030212 general & internal medicine","level4":"N/A","scoreL3":"0.4963343143463135","scoreL4":"0.0"}
+{"doi":"10.20965/jrm.2006.p0312","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation","level4":"02090104 Robotics/Robots","scoreL3":"0.6111094951629639","scoreL4":"0.5053805979936855"}
+{"doi":"10.20965/jrm.2006.p0312","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010401 analytical chemistry","level4":"N/A","scoreL3":"0.3888905048370361","scoreL4":"0.0"}
+{"doi":"10.1111/j.1747-7379.2006.040_1.x","level1":"05 social sciences","level2":"0506 political science","level3":"050602 political science & public administration","level4":"05060202 Ethnic groups/Ethnicity","scoreL3":"0.6159052848815918","scoreL4":"0.7369035568037298"}
+{"doi":"10.1111/j.1747-7379.2006.040_1.x","level1":"05 social sciences","level2":"0502 economics and business","level3":"050207 economics","level4":"N/A","scoreL3":"0.3840946555137634","scoreL4":"0.0"}
+{"doi":"10.1007/s10512-006-0049-9","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020209 energy","level4":"02020908 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1007/s10512-006-0049-9","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021108 energy","level4":"02110808 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1111/j.1365-2621.2005.01045.x","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040401 food science","level4":"04040102 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1111/j.1365-2621.2005.01045.x","level1":"04 agricultural and veterinary sciences","level2":"0405 other agricultural sciences","level3":"040502 food science","level4":"04050202 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1002/chin.200617262","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry","level4":"01040508 Functional groups/Ethers","scoreL3":"0.5566747188568115","scoreL4":"0.5582916736602783"}
+{"doi":"10.1002/chin.200617262","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry","level4":"01040207 Chemical synthesis/Total synthesis","scoreL3":"0.4433253407478332","scoreL4":"0.4417082965373993"}
+{"doi":"10.1016/j.revmed.2006.07.012","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030204 cardiovascular system & hematology","level4":"03020409 Hematology/Coagulopathies","scoreL3":"0.5101401805877686","scoreL4":"0.0546871414174914"}
+{"doi":"10.1016/j.revmed.2006.07.012","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030105 genetics & heredity","level4":"N/A","scoreL3":"0.4898599088191986","scoreL4":"0.0"}
+{"doi":"10.4109/jslab.17.132","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030104 developmental biology","level4":"N/A","scoreL3":"0.5","scoreL4":"0.0"}
+{"doi":"10.4109/jslab.17.132","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030304 developmental biology","level4":"N/A","scoreL3":"0.5","scoreL4":"0.0"}
+{"doi":"10.1108/00251740610715687","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management","level4":"05020311 International business/International trade","scoreL3":"0.605047881603241","scoreL4":"0.2156608108845153"}
+{"doi":"10.1108/00251740610715687","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing","level4":"N/A","scoreL3":"0.394952118396759","scoreL4":"0.0"}
+{"doi":"10.1080/03067310500248098","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040401 food science","level4":"04040102 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.1080/03067310500248098","level1":"04 agricultural and veterinary sciences","level2":"0405 other agricultural sciences","level3":"040502 food science","level4":"04050202 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"}
+{"doi":"10.3152/147154306781778533","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management","level4":"05020307 Innovation/Product management","scoreL3":"0.5293408632278442","scoreL4":"0.5326762795448303"}
+{"doi":"10.3152/147154306781778533","level1":"05 social sciences","level2":"0509 other social sciences","level3":"050905 science studies","level4":"05090502 Social philosophy/Capitalism","scoreL3":"0.4706590473651886","scoreL4":"0.4673237204551697"}
+{"doi":"10.1785/0120050806","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021101 geological & geomatics engineering","level4":"02110103 Concrete/Building materials","scoreL3":"0.5343400835990906","scoreL4":"0.3285667930180677"}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
+    <common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
+    <common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
+    <common:source>
+        <common:source-client-id>
+            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+            <common:path>0000-0002-5982-8983</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-client-id>
+        <common:source-name>Scopus - Elsevier</common:source-name>
+        <common:assertion-origin-orcid>
+            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
+            <common:path>0000-0001-5010-5001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:assertion-origin-orcid>
+        <common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
+    </common:source>
+    <work:title>
+        <common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
+    </work:title>
+    <work:journal-title>American Journal of Neuroradiology</work:journal-title>
+    <work:citation>
+        <work:citation-type>bibtex</work:citation-type>
+        <work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2014</common:year>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>doi</common:external-id-type>
+            <common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
+            <common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>eid</common:external-id-type>
+            <common:external-id-value>2-s2.0-84911865199</common:external-id-value>
+            <common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Durst, C.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Starke, R.M.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Gaughen, J.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nguyen, Q.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Patrie, J.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Jensen, M.E.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Evans, A.J.</work:credit-name>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+    xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+    xmlns:employment="http://www.orcid.org/ns/employment"
+    xmlns:education="http://www.orcid.org/ns/education"
+    xmlns:other-name="http://www.orcid.org/ns/other-name"
+    xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+    xmlns:funding="http://www.orcid.org/ns/funding"
+    xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+    xmlns:service="http://www.orcid.org/ns/service"
+    xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+    xmlns:distinction="http://www.orcid.org/ns/distinction"
+    xmlns:internal="http://www.orcid.org/ns/internal"
+    xmlns:membership="http://www.orcid.org/ns/membership"
+    xmlns:person="http://www.orcid.org/ns/person"
+    xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+    xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+    xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+    xmlns:activities="http://www.orcid.org/ns/activities"
+    xmlns:qualification="http://www.orcid.org/ns/qualification"
+    xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+    xmlns:error="http://www.orcid.org/ns/error"
+    xmlns:preferences="http://www.orcid.org/ns/preferences"
+    xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+    xmlns:work="http://www.orcid.org/ns/work"
+    xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
+    path="/0000-0001-5349-4030/work/50101152" visibility="public">
+    <common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
+    <common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
+    <common:source>
+        <common:source-client-id>
+            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+            <common:path>0000-0002-5982-8983</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-client-id>
+        <common:source-name>Scopus - Elsevier</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
+            racially homogenous and heterogeneous U.S. history classrooms</common:title>
+    </work:title>
+    <work:journal-title>Journal of Social Studies Research</work:journal-title>
+    <work:citation>
+        <work:citation-type>bibtex</work:citation-type>
+        <work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
+            candor in addressing social injustices in racially homogenous and heterogeneous U.S.
+            history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
+            = {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2018</common:year>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>doi</common:external-id-type>
+            <common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
+            <common:external-id-normalized transient="true"
+                >10.1016/j.jssr.2018.01.004</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>eid</common:external-id-type>
+            <common:external-id-value>2-s2.0-85041949043</common:external-id-value>
+            <common:external-id-normalized transient="true"
+                >2-s2.0-85041949043</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Parkhouse, H.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Massaro, V.R.</work:credit-name>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Abdel-Dayem K</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Abdel-Dayem Fake</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Eweda II</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>El-Sherbiny A</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Dimitry MO</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nammas W</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Khair Abde Daye</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Eweda II</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>El-Sherbiny A</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Dimitry MO</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nammas W</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq0</work:contributor-sequence>
+                <work:contributor-role>role0</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname1</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname2</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq2</work:contributor-sequence>
+                <work:contributor-role></work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname3</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence></work:contributor-sequence>
+                <work:contributor-role>role3</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name></work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq4</work:contributor-sequence>
+                <work:contributor-role>role4</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
+                       visibility="public">
+    <common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
+    <common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
+            <common:path>0000-0001-5010-5001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Quang Nguyen</common:source-name>
+    </common:source>
+    <common:organization>
+        <common:name>Beth Israel Deaconess Medical Center</common:name>
+        <common:address>
+            <common:city>Boston</common:city>
+            <common:region>MA</common:region>
+            <common:country>US</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
+                       visibility="public">
+    <common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
+    <common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
+            <common:path>0000-0001-5011-3001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>zhengyan li</common:source-name>
+    </common:source>
+    <common:start-date>
+        <common:year>2008</common:year>
+        <common:month>09</common:month>
+        <common:day>01</common:day>
+    </common:start-date>
+    <common:organization>
+        <common:name>Anhui Academy of Agricultural Sciences</common:name>
+        <common:address>
+            <common:city>Hefei</common:city>
+            <common:region>Anhui</common:region>
+            <common:country>CN</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
+                       visibility="public">
+    <common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
+    <common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
+            <common:path>0000-0001-5012-1001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Asma Bazzi</common:source-name>
+    </common:source>
+    <common:department-name>Pathology and Laboratory Medicine</common:department-name>
+    <common:role-title>Medical Laboratory Technologist</common:role-title>
+    <common:start-date>
+        <common:year>1994</common:year>
+        <common:month>10</common:month>
+        <common:day>01</common:day>
+    </common:start-date>
+    <common:end-date>
+        <common:year>2000</common:year>
+        <common:month>06</common:month>
+        <common:day>30</common:day>
+    </common:end-date>
+    <common:organization>
+        <common:name>American University of Beirut</common:name>
+        <common:address>
+            <common:city>Hamra</common:city>
+            <common:region>Beirut</common:region>
+            <common:country>LB</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
@ -0,0 +1,581 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
+    <common:orcid-identifier>
+        <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+        <common:path>0000-0001-5045-1000</common:path>
+        <common:host>orcid.org</common:host>
+    </common:orcid-identifier>
+    <preferences:preferences>
+        <preferences:locale>es</preferences:locale>
+    </preferences:preferences>
+    <history:history>
+        <history:creation-method>Direct</history:creation-method>
+        <history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
+        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+        <history:claimed>true</history:claimed>
+        <history:verified-email>true</history:verified-email>
+        <history:verified-primary-email>true</history:verified-primary-email>
+    </history:history>
+    <person:person path="/0000-0001-5045-1000/person">
+        <person:name visibility="public" path="0000-0001-5045-1000">
+            <common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
+            <common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
+            <personal-details:given-names>Patricio</personal-details:given-names>
+            <personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
+        </person:name>
+        <other-name:other-names path="/0000-0001-5045-1000/other-names"/>
+        <person:biography visibility="public" path="/0000-0001-5045-1000/biography">
+            <common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
+            <common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
+            <personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
+        </person:biography>
+        <researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
+        <email:emails path="/0000-0001-5045-1000/email"/>
+        <address:addresses path="/0000-0001-5045-1000/address"/>
+        <keyword:keywords path="/0000-0001-5045-1000/keywords"/>
+        <external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
+            <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
+            <external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
+                <common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
+                <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
+                <common:source>
+                    <common:source-client-id>
+                        <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+                        <common:path>0000-0002-5982-8983</common:path>
+                        <common:host>orcid.org</common:host>
+                    </common:source-client-id>
+                    <common:source-name>Scopus - Elsevier</common:source-name>
+                    <common:assertion-origin-orcid>
+                        <common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
+                        <common:path>0000-0001-7291-3210</common:path>
+                        <common:host>orcid.org</common:host>
+                    </common:assertion-origin-orcid>
+                    <common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
+                </common:source>
+                <common:external-id-type>Scopus Author ID</common:external-id-type>
+                <common:external-id-value>6602255248</common:external-id-value>
+                <common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
+                <common:external-id-relationship>self</common:external-id-relationship>
+            </external-identifier:external-identifier>
+        </external-identifier:external-identifiers>
+    </person:person>
+    <activities:activities-summary path="/0000-0001-5045-1000/activities">
+        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+        <activities:distinctions path="/0000-0001-5045-1000/distinctions">
+            <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+                <common:external-ids/>
+                <distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
+                    <common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
+                    <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Maestría</common:department-name>
+                    <common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
+                    <common:start-date>
+                        <common:year>2014</common:year>
+                        <common:month>10</common:month>
+                        <common:day>20</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </distinction:distinction-summary>
+            </activities:affiliation-group>
+        </activities:distinctions>
+        <activities:educations path="/0000-0001-5045-1000/educations">
+            <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+                <common:external-ids/>
+                <education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
+                    <common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Doctorado en Sociología</common:department-name>
+                    <common:role-title>Doctorando del Programa de Sociología</common:role-title>
+                    <common:start-date>
+                        <common:year>2020</common:year>
+                        <common:month>11</common:month>
+                        <common:day>06</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
+                        <common:address>
+                            <common:city>Madrid</common:city>
+                            <common:region>Comunidad de Madrid</common:region>
+                            <common:country>ES</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </education:education-summary>
+            </activities:affiliation-group>
+        </activities:educations>
+        <activities:employments path="/0000-0001-5045-1000/employments">
+            <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
+                <common:external-ids/>
+                <employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
+                    <common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
+                    <common:role-title>Especialista de Proyectos y docente</common:role-title>
+                    <common:start-date>
+                        <common:year>2021</common:year>
+                        <common:month>11</common:month>
+                        <common:day>01</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Universidad de las Artes</common:name>
+                        <common:address>
+                            <common:city>Guayaquil</common:city>
+                            <common:region>Guayas</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </employment:employment-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+                <common:external-ids/>
+                <employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
+                    <common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
+                    <common:role-title>Director</common:role-title>
+                    <common:start-date>
+                        <common:year>2019</common:year>
+                        <common:month>11</common:month>
+                        <common:day>05</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2021</common:year>
+                        <common:month>10</common:month>
+                        <common:day>31</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Regional Amazónica IKIAM</common:name>
+                        <common:address>
+                            <common:city>Tena</common:city>
+                            <common:region>Napo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                    <common:url>http://ikiam.edu.ec</common:url>
+                </employment:employment-summary>
+            </activities:affiliation-group>
+        </activities:employments>
+        <activities:fundings path="/0000-0001-5045-1000/fundings"/>
+        <activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
+        <activities:memberships path="/0000-0001-5045-1000/memberships">
+            <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+                <common:external-ids/>
+                <membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
+                    <common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Artes Escénicas</common:department-name>
+                    <common:role-title>Miembro</common:role-title>
+                    <common:start-date>
+                        <common:year>2000</common:year>
+                        <common:month>07</common:month>
+                        <common:day>15</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Casa de la Cultura Ecuatoriana</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Sierra Centro</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                    </common:organization>
+                </membership:membership-summary>
+            </activities:affiliation-group>
+        </activities:memberships>
+        <activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
+        <activities:qualifications path="/0000-0001-5045-1000/qualifications">
+            <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
+                    <common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Gobernabilidad</common:department-name>
+                    <common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
+                    <common:start-date>
+                        <common:year>2014</common:year>
+                        <common:month>10</common:month>
+                        <common:day>20</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2017</common:year>
+                        <common:month>01</common:month>
+                        <common:day>26</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
+                    <common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Posgrados</common:department-name>
+                    <common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
+                    <common:start-date>
+                        <common:year>2001</common:year>
+                        <common:month>03</common:month>
+                        <common:day>09</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2003</common:year>
+                        <common:month>02</common:month>
+                        <common:day>27</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Chimborazo</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Chimborazo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
+                    <common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Ciencias de la Educación</common:department-name>
+                    <common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
+                    <common:start-date>
+                        <common:year>1994</common:year>
+                        <common:month>10</common:month>
+                        <common:day>03</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2000</common:year>
+                        <common:month>01</common:month>
+                        <common:day>31</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Chimborazo</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Chimborazo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
+                    <common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Facultad de Artes</common:department-name>
+                    <common:role-title>Licenciado en Artes</common:role-title>
+                    <common:start-date>
+                        <common:year>1989</common:year>
+                        <common:month>09</common:month>
+                        <common:day>05</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>1997</common:year>
+                        <common:month>08</common:month>
+                        <common:day>07</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Central del Ecuador</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>FUNDREF</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+        </activities:qualifications>
+        <activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
+        <activities:services path="/0000-0001-5045-1000/services"/>
+        <activities:works path="/0000-0001-5045-1000/works">
+            <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+            <activities:group>
+                <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
+                    <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
+                    </work:title>
+                    <common:external-ids>
+                        <common:external-id>
+                            <common:external-id-type>isbn</common:external-id-type>
+                            <common:external-id-value>978-9942-29-089-2</common:external-id-value>
+                            <common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
+                            <common:external-id-relationship>part-of</common:external-id-relationship>
+                        </common:external-id>
+                    </common:external-ids>
+                    <work:type>book-chapter</work:type>
+                    <common:publication-date>
+                        <common:year>2023</common:year>
+                        <common:month>06</common:month>
+                        <common:day>07</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <work:type>conference-abstract</work:type>
+                    <common:publication-date>
+                        <common:year>2022</common:year>
+                        <common:month>10</common:month>
+                        <common:day>06</common:day>
+                    </common:publication-date>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
+                <common:external-ids>
+                    <common:external-id>
+                        <common:external-id-type>other-id</common:external-id-type>
+                        <common:external-id-value>2018</common:external-id-value>
+                        <common:external-id-normalized transient="true">2018</common:external-id-normalized>
+                        <common:external-id-relationship>self</common:external-id-relationship>
+                    </common:external-id>
+                </common:external-ids>
+                <work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
+                    <common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
+                    <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
+                    </work:title>
+                    <common:external-ids>
+                        <common:external-id>
+                            <common:external-id-type>other-id</common:external-id-type>
+                            <common:external-id-value>2018</common:external-id-value>
+                            <common:external-id-normalized transient="true">2018</common:external-id-normalized>
+                            <common:external-id-relationship>self</common:external-id-relationship>
+                        </common:external-id>
+                    </common:external-ids>
+                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
+                    <work:type>conference-poster</work:type>
+                    <common:publication-date>
+                        <common:year>2018</common:year>
+                        <common:month>11</common:month>
+                        <common:day>30</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <work:type>dissertation-thesis</work:type>
+                    <common:publication-date>
+                        <common:year>2017</common:year>
+                        <common:month>01</common:month>
+                        <common:day>26</common:day>
+                    </common:publication-date>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
+                    <common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
+                    <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>La Rebelión de los Dioses</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
+                    <work:type>registered-copyright</work:type>
+                    <common:publication-date>
+                        <common:year>2001</common:year>
+                        <common:month>08</common:month>
+                        <common:day>28</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Editorial pedagógica freire</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+        </activities:works>
+    </activities:activities-summary>
+</record:record>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java
@ -1,57 +0,0 @@
-
-package eu.dnetlib.dhp.oa.dedup;
-
-import java.util.Objects;
-
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.expressions.Aggregator;
-
-import eu.dnetlib.dhp.schema.oaf.Relation;
-
-public class RelationAggregator extends Aggregator<Relation, Relation, Relation> {
-
-	private static final Relation ZERO = new Relation();
-
-	@Override
-	public Relation zero() {
-		return ZERO;
-	}
-
-	@Override
-	public Relation reduce(Relation b, Relation a) {
-		return mergeRel(b, a);
-	}
-
-	@Override
-	public Relation merge(Relation b, Relation a) {
-		return mergeRel(b, a);
-	}
-
-	@Override
-	public Relation finish(Relation r) {
-		return r;
-	}
-
-	private Relation mergeRel(Relation b, Relation a) {
-		if (Objects.equals(b, ZERO)) {
-			return a;
-		}
-		if (Objects.equals(a, ZERO)) {
-			return b;
-		}
-
-		b.mergeFrom(a);
-		return b;
-	}
-
-	@Override
-	public Encoder<Relation> bufferEncoder() {
-		return Encoders.kryo(Relation.class);
-	}
-
-	@Override
-	public Encoder<Relation> outputEncoder() {
-		return Encoders.kryo(Relation.class);
-	}
-}
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala
@ -1,78 +0,0 @@
-package eu.dnetlib.dhp.oa.dedup
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser
-import eu.dnetlib.dhp.common.HdfsSupport
-import eu.dnetlib.dhp.schema.oaf.Relation
-import eu.dnetlib.dhp.utils.ISLookupClientFactory
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService
-import org.apache.commons.io.IOUtils
-import org.apache.spark.SparkConf
-import org.apache.spark.sql._
-import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
-import org.slf4j.LoggerFactory
-
-object SparkCleanRelation {
-  private val log = LoggerFactory.getLogger(classOf[SparkCleanRelation])
-
-  @throws[Exception]
-  def main(args: Array[String]): Unit = {
-    val parser = new ArgumentApplicationParser(
-      IOUtils.toString(
-        classOf[SparkCleanRelation].getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json")
-      )
-    )
-    parser.parseArgument(args)
-    val conf = new SparkConf
-
-    new SparkCleanRelation(parser, AbstractSparkAction.getSparkSession(conf))
-      .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl")))
-  }
-}
-
-class SparkCleanRelation(parser: ArgumentApplicationParser, spark: SparkSession)
-    extends AbstractSparkAction(parser, spark) {
-  override def run(isLookUpService: ISLookUpService): Unit = {
-    val graphBasePath = parser.get("graphBasePath")
-    val inputPath = parser.get("inputPath")
-    val outputPath = parser.get("outputPath")
-
-    SparkCleanRelation.log.info("graphBasePath: '{}'", graphBasePath)
-    SparkCleanRelation.log.info("inputPath: '{}'", inputPath)
-    SparkCleanRelation.log.info("outputPath: '{}'", outputPath)
-
-    AbstractSparkAction.removeOutputDir(spark, outputPath)
-
-    val entities =
-      Seq("datasource", "project", "organization", "publication", "dataset", "software", "otherresearchproduct")
-
-    val idsSchema = StructType.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>")
-
-    val emptyIds = spark.createDataFrame(spark.sparkContext.emptyRDD[Row].setName("empty"),
-      idsSchema)
-
-    val ids = entities
-      .foldLeft(emptyIds)((ds, entity) => {
-        val entityPath = graphBasePath + '/' + entity
-        if (HdfsSupport.exists(entityPath, spark.sparkContext.hadoopConfiguration)) {
-          ds.union(spark.read.schema(idsSchema).json(entityPath))
-        } else {
-          ds
-        }
-      })
-      .filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true")
-      .select("id")
-      .distinct()
-
-    val relations = spark.read.schema(Encoders.bean(classOf[Relation]).schema).json(inputPath)
-      .filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true")
-
-    AbstractSparkAction.save(
-      relations
-        .join(ids, col("source") === ids("id"), "leftsemi")
-        .join(ids, col("target") === ids("id"), "leftsemi"),
-      outputPath,
-      SaveMode.Overwrite
-    )
-  }
-}
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java
@ -7,6 +7,7 @@ import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
@ -77,13 +78,12 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {

 		log.info("Number of Openorgs Merge Relations collected: {}", mergeRelsRDD.count());

-		spark
+		final Dataset<Relation> relations = spark
 			.createDataset(
 				mergeRelsRDD.rdd(),
-				Encoders.bean(Relation.class))
-			.write()
-			.mode(SaveMode.Append)
-			.parquet(outputPath);
+				Encoders.bean(Relation.class));
+
+		saveParquet(relations, outputPath, SaveMode.Append);
 	}

 	private boolean isMergeRel(Relation rel) {
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java
@ -67,12 +67,7 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
 			log.debug("Number of non-Openorgs relations collected: {}", simRels.count());
 		}

-		spark
-			.createDataset(simRels.rdd(), Encoders.bean(Relation.class))
-			.write()
-			.mode(SaveMode.Overwrite)
-			.json(outputPath);
-
+		save(spark.createDataset(simRels.rdd(), Encoders.bean(Relation.class)), outputPath, SaveMode.Overwrite);
 	}

 }
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
@ -155,7 +155,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
 					(FlatMapFunction<ConnectedComponent, Relation>) cc -> ccToMergeRel(cc, dedupConf),
 					Encoders.bean(Relation.class));

-			mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath);
+			saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite);

 		}
 	}
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java
@ -72,11 +72,7 @@ public class SparkCreateOrgsDedupRecord extends AbstractSparkAction {

 		final String mergeRelsPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization");

-		rootOrganization(spark, entityPath, mergeRelsPath)
-			.write()
-			.mode(SaveMode.Overwrite)
-			.option("compression", "gzip")
-			.json(outputPath);
+		save(rootOrganization(spark, entityPath, mergeRelsPath), outputPath, SaveMode.Overwrite);

 	}

--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java
@ -82,8 +82,6 @@ public class SparkCreateSimRels extends AbstractSparkAction {
 			final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity);
 			removeOutputDir(spark, outputPath);

-			JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
 			SparkDeduper deduper = new SparkDeduper(dedupConf);

 			Dataset<?> simRels = spark
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
@ -3,23 +3,19 @@ package eu.dnetlib.dhp.oa.dedup;

 import static org.apache.spark.sql.functions.col;

-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Objects;
-
-import org.apache.commons.beanutils.BeanUtils;
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.*;
+import org.apache.spark.sql.catalyst.encoders.RowEncoder;
+import org.apache.spark.sql.types.StructType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.common.EntityType;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
@ -70,73 +66,63 @@ public class SparkPropagateRelation extends AbstractSparkAction {
 		log.info("workingPath: '{}'", workingPath);
 		log.info("graphOutputPath: '{}'", graphOutputPath);

-		final String outputRelationPath = DedupUtility.createEntityPath(graphOutputPath, "relation");
-		removeOutputDir(spark, outputRelationPath);
-
 		Dataset<Relation> mergeRels = spark
 			.read()
 			.load(DedupUtility.createMergeRelPath(workingPath, "*", "*"))
 			.as(REL_BEAN_ENC);

 		// <mergedObjectID, dedupID>
-		Dataset<Row> mergedIds = mergeRels
+		Dataset<Row> idsToMerge = mergeRels
 			.where(col("relClass").equalTo(ModelConstants.MERGES))
 			.select(col("source").as("dedupID"), col("target").as("mergedObjectID"))
-			.distinct()
-			.cache();
+			.distinct();

 		Dataset<Row> allRels = spark
 			.read()
 			.schema(REL_BEAN_ENC.schema())
-			.json(DedupUtility.createEntityPath(graphBasePath, "relation"));
+			.json(graphBasePath + "/relation");

 		Dataset<Relation> dedupedRels = allRels
-			.joinWith(mergedIds, allRels.col("source").equalTo(mergedIds.col("mergedObjectID")), "left_outer")
-			.joinWith(mergedIds, col("_1.target").equalTo(mergedIds.col("mergedObjectID")), "left_outer")
+			.joinWith(idsToMerge, allRels.col("source").equalTo(idsToMerge.col("mergedObjectID")), "left_outer")
+			.joinWith(idsToMerge, col("_1.target").equalTo(idsToMerge.col("mergedObjectID")), "left_outer")
 			.select("_1._1", "_1._2.dedupID", "_2.dedupID")
 			.as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING()))
-			.flatMap(SparkPropagateRelation::addInferredRelations, REL_KRYO_ENC);
+			.map((MapFunction<Tuple3<Relation, String, String>, Relation>) t -> {
+				Relation rel = t._1();
+				String newSource = t._2();
+				String newTarget = t._3();

-		Dataset<Relation> processedRelations = distinctRelations(
-			dedupedRels.union(mergeRels.map((MapFunction<Relation, Relation>) r -> r, REL_KRYO_ENC)))
-				.filter((FilterFunction<Relation>) r -> !Objects.equals(r.getSource(), r.getTarget()));
+				if (rel.getDataInfo() == null) {
+					rel.setDataInfo(new DataInfo());
+				}

-		save(processedRelations, outputRelationPath, SaveMode.Overwrite);
-	}
+				if (newSource != null || newTarget != null) {
+					rel.getDataInfo().setDeletedbyinference(false);

-	private static Iterator<Relation> addInferredRelations(Tuple3<Relation, String, String> t) throws Exception {
-		Relation existingRel = t._1();
-		String newSource = t._2();
-		String newTarget = t._3();
+					if (newSource != null)
+						rel.setSource(newSource);

-		if (newSource == null && newTarget == null) {
-			return Collections.singleton(t._1()).iterator();
-		}
+					if (newTarget != null)
+						rel.setTarget(newTarget);
+				}

-		// update existing relation
-		if (existingRel.getDataInfo() == null) {
-			existingRel.setDataInfo(new DataInfo());
-		}
-		existingRel.getDataInfo().setDeletedbyinference(true);
+				return rel;
+			}, REL_BEAN_ENC);

-		// Create new relation inferred by dedupIDs
-		Relation inferredRel = (Relation) BeanUtils.cloneBean(existingRel);
+		// ids of records that are both not deletedbyinference and not invisible
+		Dataset<Row> ids = validIds(spark, graphBasePath);

-		inferredRel.setDataInfo((DataInfo) BeanUtils.cloneBean(existingRel.getDataInfo()));
-		inferredRel.getDataInfo().setDeletedbyinference(false);
+		// filter relations that point to valid records, can force them to be visible
+		Dataset<Relation> cleanedRels = dedupedRels
+			.join(ids, col("source").equalTo(ids.col("id")), "leftsemi")
+			.join(ids, col("target").equalTo(ids.col("id")), "leftsemi")
+			.as(REL_BEAN_ENC)
+			.map((MapFunction<Relation, Relation>) r -> {
+				r.getDataInfo().setInvisible(false);
+				return r;
+			}, REL_KRYO_ENC);

-		if (newSource != null)
-			inferredRel.setSource(newSource);
-
-		if (newTarget != null)
-			inferredRel.setTarget(newTarget);
-
-		return Arrays.asList(existingRel, inferredRel).iterator();
-	}
-
-	private Dataset<Relation> distinctRelations(Dataset<Relation> rels) {
-		return rels
-			.filter(getRelationFilterFunction())
+		Dataset<Relation> distinctRels = cleanedRels
 			.groupByKey(
 				(MapFunction<Relation, String>) r -> String
 					.join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()),
@ -146,13 +132,33 @@ public class SparkPropagateRelation extends AbstractSparkAction {
 				return b;
 			})
 			.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC);
+
+		final String outputRelationPath = graphOutputPath + "/relation";
+		removeOutputDir(spark, outputRelationPath);
+		save(
+			distinctRels
+				.union(mergeRels)
+				.filter("source != target AND dataInfo.deletedbyinference != true AND dataInfo.invisible != true"),
+			outputRelationPath,
+			SaveMode.Overwrite);
 	}

-	private FilterFunction<Relation> getRelationFilterFunction() {
-		return r -> StringUtils.isNotBlank(r.getSource()) ||
-			StringUtils.isNotBlank(r.getTarget()) ||
-			StringUtils.isNotBlank(r.getRelType()) ||
-			StringUtils.isNotBlank(r.getSubRelType()) ||
-			StringUtils.isNotBlank(r.getRelClass());
+	static Dataset<Row> validIds(SparkSession spark, String graphBasePath) {
+		StructType idsSchema = StructType
+			.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
+
+		Dataset<Row> allIds = spark.emptyDataset(RowEncoder.apply(idsSchema));
+
+		for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
+			String entityPath = graphBasePath + '/' + entityType.name();
+			if (HdfsSupport.exists(entityPath, spark.sparkContext().hadoopConfiguration())) {
+				allIds = allIds.union(spark.read().schema(idsSchema).json(entityPath));
+			}
+		}
+
+		return allIds
+			.filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true")
+			.select("id")
+			.distinct();
 	}
 }
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java
@ -67,8 +67,6 @@ public class SparkWhitelistSimRels extends AbstractSparkAction {
 		log.info("workingPath:   '{}'", workingPath);
 		log.info("whiteListPath: '{}'", whiteListPath);

-		JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
 		// file format: source####target
 		Dataset<Row> whiteListRels = spark
 			.read()
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json
@ -1,20 +0,0 @@
-[
-  {
-    "paramName": "i",
-    "paramLongName": "graphBasePath",
-    "paramDescription": "the base path of raw graph",
-    "paramRequired": true
-  },
-  {
-    "paramName": "w",
-    "paramLongName": "inputPath",
-    "paramDescription": "the path to the input relation to cleanup",
-    "paramRequired": true
-  },
-  {
-    "paramName": "o",
-    "paramLongName": "outputPath",
-    "paramDescription": "the path of the output relation cleaned",
-    "paramRequired": true
-  }
-]
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
@ -16,6 +16,10 @@
            <name>filterInvisible</name>
            <description>whether filter out invisible entities after merge</description>
        </property>
+        <property>
+            <name>isLookupUrl</name>
+            <description>the URL address of the lookUp service</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>heap memory for driver process</description>
@ -100,35 +104,9 @@
                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
-            <arg>--graphOutputPath</arg><arg>${workingPath}/propagaterelation/</arg>
+            <arg>--graphOutputPath</arg><arg>${graphOutputPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
        </spark>
-        <ok to="CleanRelation"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="CleanRelation">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Clean Relations</name>
-            <class>eu.dnetlib.dhp.oa.dedup.SparkCleanRelation</class>
-            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=15000
-            </spark-opts>
-            <arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
-            <arg>--inputPath</arg><arg>${workingPath}/propagaterelation/relation</arg>
-            <arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
-        </spark>
        <ok to="group_entities"/>
        <error to="Kill"/>
    </action>
@ -152,32 +130,9 @@
                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--graphInputPath</arg><arg>${graphBasePath}</arg>
-            <arg>--outputPath</arg><arg>${workingPath}/grouped_entities</arg>
-        </spark>
-        <ok to="dispatch_entities"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dispatch_entities">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dispatch grouped entitities</name>
-            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
-            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=7680
-            </spark-opts>
-            <arg>--inputPath</arg><arg>${workingPath}/grouped_entities</arg>
+            <arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg>
            <arg>--outputPath</arg><arg>${graphOutputPath}</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--filterInvisible</arg><arg>${filterInvisible}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.dedup;

 import static java.nio.file.Files.createTempDirectory;

-import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.functions.count;
 import static org.junit.jupiter.api.Assertions.*;
 import static org.mockito.Mockito.lenient;
@ -23,14 +22,13 @@ import java.util.stream.Collectors;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.Mock;
@ -46,8 +44,6 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-import eu.dnetlib.pace.util.MapDocumentUtil;
-import scala.Tuple2;

@ExtendWith(MockitoExtension.class)
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@ -62,6 +58,8 @@ public class SparkDedupTest implements Serializable {
 	private static String testGraphBasePath;
 	private static String testOutputBasePath;
 	private static String testDedupGraphBasePath;
+	private static String testConsistencyGraphBasePath;
+
 	private static final String testActionSetId = "test-orchestrator";
 	private static String whitelistPath;
 	private static List<String> whiteList;
@ -75,6 +73,7 @@ public class SparkDedupTest implements Serializable {
 			.get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI())
 			.toFile()
 			.getAbsolutePath();
+
 		testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-")
 			.toAbsolutePath()
 			.toString();
@ -83,6 +82,10 @@ public class SparkDedupTest implements Serializable {
 			.toAbsolutePath()
 			.toString();

+		testConsistencyGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-")
+			.toAbsolutePath()
+			.toString();
+
 		whitelistPath = Paths
 			.get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI())
 			.toFile()
@ -674,22 +677,45 @@ public class SparkDedupTest implements Serializable {
 		assertEquals(mergedOrp, deletedOrp);
 	}

+	@Test
+	@Order(6)
+	void copyRelationsNoOpenorgsTest() throws Exception {
+
+		ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					SparkCopyRelationsNoOpenorgs.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")));
+		parser
+			.parseArgument(
+				new String[] {
+					"-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath
+				});
+
+		new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService);
+
+		final Dataset<Row> outputRels = spark.read().text(testDedupGraphBasePath + "/relation");
+
+		System.out.println(outputRels.count());
+		// assertEquals(2382, outputRels.count());
+	}
+
 	@Test
 	@Order(7)
 	void propagateRelationTest() throws Exception {

 		ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"));
-		String outputRelPath = testDedupGraphBasePath + "/propagaterelation";
 		parser
 			.parseArgument(
 				new String[] {
-					"-i", testGraphBasePath, "-w", testOutputBasePath, "-o", outputRelPath
+					"-i", testDedupGraphBasePath, "-w", testOutputBasePath, "-o", testConsistencyGraphBasePath
 				});

 		new SparkPropagateRelation(parser, spark).run(isLookUpService);

-		long relations = jsc.textFile(outputRelPath + "/relation").count();
+		long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count();

 //		assertEquals(4860, relations);
 		System.out.println("relations = " + relations);
@ -699,95 +725,52 @@ public class SparkDedupTest implements Serializable {
 			.read()
 			.load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*"))
 			.as(Encoders.bean(Relation.class));
-		final JavaPairRDD<String, String> mergedIds = mergeRels
-			.where("relClass == 'merges'")
-			.select(mergeRels.col("target"))
-			.distinct()
-			.toJavaRDD()
-			.mapToPair(
-				(PairFunction<Row, String, String>) r -> new Tuple2<String, String>(r.getString(0), "d"));

-		JavaRDD<String> toCheck = jsc
-			.textFile(outputRelPath + "/relation")
-			.mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json))
-			.join(mergedIds)
-			.map(t -> t._2()._1())
-			.mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json))
-			.join(mergedIds)
-			.map(t -> t._2()._1());
+		Dataset<Row> inputRels = spark
+			.read()
+			.json(testDedupGraphBasePath + "/relation");

-		long deletedbyinference = toCheck.filter(this::isDeletedByInference).count();
-		long updated = toCheck.count();
+		Dataset<Row> outputRels = spark
+			.read()
+			.json(testConsistencyGraphBasePath + "/relation");

-		assertEquals(updated, deletedbyinference);
+		assertEquals(
+			0, outputRels
+				.filter("dataInfo.deletedbyinference == true OR dataInfo.invisible == true")
+				.count());
+
+		assertEquals(
+			5, outputRels
+				.filter("relClass NOT IN ('merges', 'isMergedIn')")
+				.count());
+
+		assertEquals(5 + mergeRels.count(), outputRels.count());
 	}

 	@Test
 	@Order(8)
-	void testCleanBaseRelations() throws Exception {
-		ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json"));
-
-		// append dangling relations to be cleaned up
+	void testCleanedPropagatedRelations() throws Exception {
 		Dataset<Row> df_before = spark
 			.read()
 			.schema(Encoders.bean(Relation.class).schema())
-			.json(testGraphBasePath + "/relation");
-		Dataset<Row> df_input = df_before
-			.unionByName(df_before.drop("source").withColumn("source", functions.lit("n/a")))
-			.unionByName(df_before.drop("target").withColumn("target", functions.lit("n/a")));
-		df_input.write().mode(SaveMode.Overwrite).json(testOutputBasePath + "_tmp");
-
-		parser
-			.parseArgument(
-				new String[] {
-					"--graphBasePath", testGraphBasePath,
-					"--inputPath", testGraphBasePath + "/relation",
-					"--outputPath", testDedupGraphBasePath + "/relation"
-				});
-
-		new SparkCleanRelation(parser, spark).run(isLookUpService);
+			.json(testDedupGraphBasePath + "/relation");

 		Dataset<Row> df_after = spark
 			.read()
 			.schema(Encoders.bean(Relation.class).schema())
-			.json(testDedupGraphBasePath + "/relation");
-
-		assertNotEquals(df_before.count(), df_input.count());
-		assertNotEquals(df_input.count(), df_after.count());
-		assertEquals(5, df_after.count());
-	}
-
-	@Test
-	@Order(9)
-	void testCleanDedupedRelations() throws Exception {
-		ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json"));
-
-		String inputRelPath = testDedupGraphBasePath + "/propagaterelation/relation";
-
-		// append dangling relations to be cleaned up
-		Dataset<Row> df_before = spark.read().schema(Encoders.bean(Relation.class).schema()).json(inputRelPath);
-
-		df_before.filter(col("dataInfo.deletedbyinference").notEqual(true)).show(50, false);
-
-		parser
-			.parseArgument(
-				new String[] {
-					"--graphBasePath", testGraphBasePath,
-					"--inputPath", inputRelPath,
-					"--outputPath", testDedupGraphBasePath + "/relation"
-				});
-
-		new SparkCleanRelation(parser, spark).run(isLookUpService);
-
-		Dataset<Row> df_after = spark
-			.read()
-			.schema(Encoders.bean(Relation.class).schema())
-			.json(testDedupGraphBasePath + "/relation");
+			.json(testConsistencyGraphBasePath + "/relation");

 		assertNotEquals(df_before.count(), df_after.count());
-		assertEquals(0, df_after.count());
+
+		assertEquals(
+			0, df_after
+				.filter("dataInfo.deletedbyinference == true OR dataInfo.invisible == true")
+				.count());
+
+		assertEquals(
+			5, df_after
+				.filter("relClass NOT IN ('merges', 'isMergedIn')")
+				.count());
 	}

 	@Test
@ -813,6 +796,7 @@ public class SparkDedupTest implements Serializable {
 	public static void finalCleanUp() throws IOException {
 		FileUtils.deleteDirectory(new File(testOutputBasePath));
 		FileUtils.deleteDirectory(new File(testDedupGraphBasePath));
+		FileUtils.deleteDirectory(new File(testConsistencyGraphBasePath));
 	}

 	public boolean isDeletedByInference(String s) {
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup;

 import static java.nio.file.Files.createTempDirectory;

+import static org.apache.spark.sql.functions.col;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.mockito.Mockito.lenient;

@ -15,10 +16,6 @@ import java.nio.file.Paths;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
@ -33,8 +30,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-import eu.dnetlib.pace.util.MapDocumentUtil;
-import scala.Tuple2;

@ExtendWith(MockitoExtension.class)
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@ -44,11 +39,11 @@ public class SparkOpenorgsProvisionTest implements Serializable {
 	ISLookUpService isLookUpService;

 	private static SparkSession spark;
-	private static JavaSparkContext jsc;

 	private static String testGraphBasePath;
 	private static String testOutputBasePath;
 	private static String testDedupGraphBasePath;
+	private static String testConsistencyGraphBasePath;
 	private static final String testActionSetId = "test-orchestrator";

 	@BeforeAll
@ -64,6 +59,9 @@ public class SparkOpenorgsProvisionTest implements Serializable {
 		testDedupGraphBasePath = createTempDirectory(SparkOpenorgsProvisionTest.class.getSimpleName() + "-")
 			.toAbsolutePath()
 			.toString();
+		testConsistencyGraphBasePath = createTempDirectory(SparkOpenorgsProvisionTest.class.getSimpleName() + "-")
+			.toAbsolutePath()
+			.toString();

 		FileUtils.deleteDirectory(new File(testOutputBasePath));
 		FileUtils.deleteDirectory(new File(testDedupGraphBasePath));
@ -76,8 +74,13 @@ public class SparkOpenorgsProvisionTest implements Serializable {
 			.master("local[*]")
 			.config(conf)
 			.getOrCreate();
+	}

-		jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+	@AfterAll
+	public static void finalCleanUp() throws IOException {
+		FileUtils.deleteDirectory(new File(testOutputBasePath));
+		FileUtils.deleteDirectory(new File(testDedupGraphBasePath));
+		FileUtils.deleteDirectory(new File(testConsistencyGraphBasePath));
 	}

 	@BeforeEach
@ -186,26 +189,21 @@ public class SparkOpenorgsProvisionTest implements Serializable {

 		new SparkUpdateEntity(parser, spark).run(isLookUpService);

-		long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count();
+		Dataset<Row> organizations = spark.read().json(testDedupGraphBasePath + "/organization");

-		long mergedOrgs = spark
+		Dataset<Row> mergedOrgs = spark
 			.read()
 			.load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")
-			.as(Encoders.bean(Relation.class))
 			.where("relClass=='merges'")
-			.javaRDD()
-			.map(Relation::getTarget)
-			.distinct()
-			.count();
+			.select("target")
+			.distinct();

-		assertEquals(80, organizations);
+		assertEquals(80, organizations.count());

-		long deletedOrgs = jsc
-			.textFile(testDedupGraphBasePath + "/organization")
-			.filter(this::isDeletedByInference)
-			.count();
+		Dataset<Row> deletedOrgs = organizations
+			.filter("dataInfo.deletedbyinference = TRUE");

-		assertEquals(mergedOrgs, deletedOrgs);
+		assertEquals(mergedOrgs.count(), deletedOrgs.count());
 	}

 	@Test
@ -226,10 +224,9 @@ public class SparkOpenorgsProvisionTest implements Serializable {

 		new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService);

-		final JavaRDD<String> rels = jsc.textFile(testDedupGraphBasePath + "/relation");
-
-		assertEquals(2382, rels.count());
+		final Dataset<Row> outputRels = spark.read().text(testDedupGraphBasePath + "/relation");

+		assertEquals(2382, outputRels.count());
 	}

 	@Test
@ -244,51 +241,41 @@ public class SparkOpenorgsProvisionTest implements Serializable {
 		parser
 			.parseArgument(
 				new String[] {
-					"-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath
+					"-i", testDedupGraphBasePath, "-w", testOutputBasePath, "-o", testConsistencyGraphBasePath
 				});

 		new SparkPropagateRelation(parser, spark).run(isLookUpService);

-		long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count();
-
-		assertEquals(4896, relations);
-
-		// check deletedbyinference
 		final Dataset<Relation> mergeRels = spark
 			.read()
 			.load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*"))
 			.as(Encoders.bean(Relation.class));
-		final JavaPairRDD<String, String> mergedIds = mergeRels
+
+		Dataset<Row> inputRels = spark
+			.read()
+			.json(testDedupGraphBasePath + "/relation");
+
+		Dataset<Row> outputRels = spark
+			.read()
+			.json(testConsistencyGraphBasePath + "/relation");
+
+		final Dataset<Row> mergedIds = mergeRels
 			.where("relClass == 'merges'")
-			.select(mergeRels.col("target"))
-			.distinct()
-			.toJavaRDD()
-			.mapToPair(
-				(PairFunction<Row, String, String>) r -> new Tuple2<String, String>(r.getString(0), "d"));
+			.select(col("target").as("id"))
+			.distinct();

-		JavaRDD<String> toCheck = jsc
-			.textFile(testDedupGraphBasePath + "/relation")
-			.mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json))
-			.join(mergedIds)
-			.map(t -> t._2()._1())
-			.mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json))
-			.join(mergedIds)
-			.map(t -> t._2()._1());
+		Dataset<Row> toUpdateRels = inputRels
+			.as("rel")
+			.join(mergedIds.as("s"), col("rel.source").equalTo(col("s.id")), "left_outer")
+			.join(mergedIds.as("t"), col("rel.target").equalTo(col("t.id")), "left_outer")
+			.filter("s.id IS NOT NULL OR t.id IS NOT NULL")
+			.distinct();

-		long deletedbyinference = toCheck.filter(this::isDeletedByInference).count();
-		long updated = toCheck.count();
+		Dataset<Row> updatedRels = inputRels
+			.select("source", "target", "relClass")
+			.except(outputRels.select("source", "target", "relClass"));

-		assertEquals(updated, deletedbyinference);
+		assertEquals(toUpdateRels.count(), updatedRels.count());
+		assertEquals(140, outputRels.count());
 	}
-
-	@AfterAll
-	public static void finalCleanUp() throws IOException {
-		FileUtils.deleteDirectory(new File(testOutputBasePath));
-		FileUtils.deleteDirectory(new File(testDedupGraphBasePath));
-	}
-
-	public boolean isDeletedByInference(String s) {
-		return s.contains("\"deletedbyinference\":true");
-	}
-
 }
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
@ -0,0 +1,940 @@
+[
+  {
+    "id": "100007630",
+    "uri": "http://dx.doi.org/10.13039/100007630",
+    "name": "College of Engineering and Informatics, National University of Ireland, Galway",
+    "synonym": []
+  },
+  {
+    "id": "100007731",
+    "uri": "http://dx.doi.org/10.13039/100007731",
+    "name": "Endo International",
+    "synonym": []
+  },
+  {
+    "id": "100008099",
+    "uri": "http://dx.doi.org/10.13039/100008099",
+    "name": "Food Safety Authority of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100008124",
+    "uri": "http://dx.doi.org/10.13039/100008124",
+    "name": "Department of Jobs, Enterprise and Innovation",
+    "synonym": []
+  },
+  {
+    "id": "100009098",
+    "uri": "http://dx.doi.org/10.13039/100009098",
+    "name": "Department of Foreign Affairs and Trade, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100009099",
+    "uri": "http://dx.doi.org/10.13039/100009099",
+    "name": "Irish Aid",
+    "synonym": []
+  },
+  {
+    "id": "100009770",
+    "uri": "http://dx.doi.org/10.13039/100009770",
+    "name": "National University of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100009985",
+    "uri": "http://dx.doi.org/10.13039/100009985",
+    "name": "Parkinson's Association of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100010399",
+    "uri": "http://dx.doi.org/10.13039/100010399",
+    "name": "European Society of Cataract and Refractive Surgeons",
+    "synonym": []
+  },
+  {
+    "id": "100010414",
+    "uri": "http://dx.doi.org/10.13039/100010414",
+    "name": "Health Research Board",
+    "synonym": [
+      "501100001590"
+    ]
+  },
+  {
+    "id": "100010546",
+    "uri": "http://dx.doi.org/10.13039/100010546",
+    "name": "Deparment of Children and Youth Affairs, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100010993",
+    "uri": "http://dx.doi.org/10.13039/100010993",
+    "name": "Irish Nephrology Society",
+    "synonym": []
+  },
+  {
+    "id": "100011062",
+    "uri": "http://dx.doi.org/10.13039/100011062",
+    "name": "Asian Spinal Cord Network",
+    "synonym": []
+  },
+  {
+    "id": "100011096",
+    "uri": "http://dx.doi.org/10.13039/100011096",
+    "name": "Jazz Pharmaceuticals",
+    "synonym": []
+  },
+  {
+    "id": "100011396",
+    "uri": "http://dx.doi.org/10.13039/100011396",
+    "name": "Irish College of General Practitioners",
+    "synonym": []
+  },
+  {
+    "id": "100012734",
+    "uri": "http://dx.doi.org/10.13039/100012734",
+    "name": "Department for Culture, Heritage and the Gaeltacht, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100012754",
+    "uri": "http://dx.doi.org/10.13039/100012754",
+    "name": "Horizon Pharma",
+    "synonym": []
+  },
+  {
+    "id": "100012891",
+    "uri": "http://dx.doi.org/10.13039/100012891",
+    "name": "Medical Research Charities Group",
+    "synonym": []
+  },
+  {
+    "id": "100012919",
+    "uri": "http://dx.doi.org/10.13039/100012919",
+    "name": "Epilepsy Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100012920",
+    "uri": "http://dx.doi.org/10.13039/100012920",
+    "name": "GLEN",
+    "synonym": []
+  },
+  {
+    "id": "100012921",
+    "uri": "http://dx.doi.org/10.13039/100012921",
+    "name": "Royal College of Surgeons in Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100013029",
+    "uri": "http://dx.doi.org/10.13039/100013029",
+    "name": "Iris O'Brien Foundation",
+    "synonym": []
+  },
+  {
+    "id": "100013206",
+    "uri": "http://dx.doi.org/10.13039/100013206",
+    "name": "Food Institutional Research Measure",
+    "synonym": []
+  },
+  {
+    "id": "100013381",
+    "uri": "http://dx.doi.org/10.13039/100013381",
+    "name": "Irish Phytochemical Food Network",
+    "synonym": []
+  },
+  {
+    "id": "100013433",
+    "uri": "http://dx.doi.org/10.13039/100013433",
+    "name": "Transport Infrastructure Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100013461",
+    "uri": "http://dx.doi.org/10.13039/100013461",
+    "name": "Arts and Disability Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100013548",
+    "uri": "http://dx.doi.org/10.13039/100013548",
+    "name": "Filmbase",
+    "synonym": []
+  },
+  {
+    "id": "100013917",
+    "uri": "http://dx.doi.org/10.13039/100013917",
+    "name": "Society for Musicology in Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100014251",
+    "uri": "http://dx.doi.org/10.13039/100014251",
+    "name": "Humanities in the European Research Area",
+    "synonym": []
+  },
+  {
+    "id": "100014364",
+    "uri": "http://dx.doi.org/10.13039/100014364",
+    "name": "National Children's Research Centre",
+    "synonym": []
+  },
+  {
+    "id": "100014384",
+    "uri": "http://dx.doi.org/10.13039/100014384",
+    "name": "Amarin Corporation",
+    "synonym": []
+  },
+  {
+    "id": "100014902",
+    "uri": "http://dx.doi.org/10.13039/100014902",
+    "name": "Irish Association for Cancer Research",
+    "synonym": []
+  },
+  {
+    "id": "100015023",
+    "uri": "http://dx.doi.org/10.13039/100015023",
+    "name": "Ireland Funds",
+    "synonym": []
+  },
+  {
+    "id": "100015037",
+    "uri": "http://dx.doi.org/10.13039/100015037",
+    "name": "Simon Cumbers Media Fund",
+    "synonym": []
+  },
+  {
+    "id": "100015319",
+    "uri": "http://dx.doi.org/10.13039/100015319",
+    "name": "Sport Ireland Institute",
+    "synonym": []
+  },
+  {
+    "id": "100015320",
+    "uri": "http://dx.doi.org/10.13039/100015320",
+    "name": "Paralympics Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100015442",
+    "uri": "http://dx.doi.org/10.13039/100015442",
+    "name": "Global Brain Health Institute",
+    "synonym": []
+  },
+  {
+    "id": "100015776",
+    "uri": "http://dx.doi.org/10.13039/100015776",
+    "name": "Health and Social Care Board",
+    "synonym": []
+  },
+  {
+    "id": "100015992",
+    "uri": "http://dx.doi.org/10.13039/100015992",
+    "name": "St. Luke's Institute of Cancer Research",
+    "synonym": []
+  },
+  {
+    "id": "100017897",
+    "uri": "http://dx.doi.org/10.13039/100017897",
+    "name": "Friedreich\u2019s Ataxia Research Alliance Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100018064",
+    "uri": "http://dx.doi.org/10.13039/100018064",
+    "name": "Department of Tourism, Culture, Arts, Gaeltacht, Sport and Media",
+    "synonym": []
+  },
+  {
+    "id": "100018172",
+    "uri": "http://dx.doi.org/10.13039/100018172",
+    "name": "Department of the Environment, Climate and Communications",
+    "synonym": []
+  },
+  {
+    "id": "100018175",
+    "uri": "http://dx.doi.org/10.13039/100018175",
+    "name": "Dairy Processing Technology Centre",
+    "synonym": []
+  },
+  {
+    "id": "100018270",
+    "uri": "http://dx.doi.org/10.13039/100018270",
+    "name": "Health Service Executive",
+    "synonym": []
+  },
+  {
+    "id": "100018529",
+    "uri": "http://dx.doi.org/10.13039/100018529",
+    "name": "Alkermes",
+    "synonym": []
+  },
+  {
+    "id": "100018542",
+    "uri": "http://dx.doi.org/10.13039/100018542",
+    "name": "Irish Endocrine Society",
+    "synonym": []
+  },
+  {
+    "id": "100018754",
+    "uri": "http://dx.doi.org/10.13039/100018754",
+    "name": "An Roinn Sl\u00e1inte",
+    "synonym": []
+  },
+  {
+    "id": "100018998",
+    "uri": "http://dx.doi.org/10.13039/100018998",
+    "name": "Irish Research eLibrary",
+    "synonym": []
+  },
+  {
+    "id": "100019428",
+    "uri": "http://dx.doi.org/10.13039/100019428",
+    "name": "Nabriva Therapeutics",
+    "synonym": []
+  },
+  {
+    "id": "100019637",
+    "uri": "http://dx.doi.org/10.13039/100019637",
+    "name": "Horizon Therapeutics",
+    "synonym": []
+  },
+  {
+    "id": "100020174",
+    "uri": "http://dx.doi.org/10.13039/100020174",
+    "name": "Health Research Charities Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100020202",
+    "uri": "http://dx.doi.org/10.13039/100020202",
+    "name": "UCD Foundation",
+    "synonym": []
+  },
+  {
+    "id": "100020233",
+    "uri": "http://dx.doi.org/10.13039/100020233",
+    "name": "Ireland Canada University Foundation",
+    "synonym": []
+  },
+  {
+    "id": "100022943",
+    "uri": "http://dx.doi.org/10.13039/100022943",
+    "name": "National Cancer Registry Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001581",
+    "uri": "http://dx.doi.org/10.13039/501100001581",
+    "name": "Arts Council of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001582",
+    "uri": "http://dx.doi.org/10.13039/501100001582",
+    "name": "Centre for Ageing Research and Development in Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001583",
+    "uri": "http://dx.doi.org/10.13039/501100001583",
+    "name": "Cystinosis Foundation Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001584",
+    "uri": "http://dx.doi.org/10.13039/501100001584",
+    "name": "Department of Agriculture, Food and the Marine, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001586",
+    "uri": "http://dx.doi.org/10.13039/501100001586",
+    "name": "Department of Education and Skills, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001587",
+    "uri": "http://dx.doi.org/10.13039/501100001587",
+    "name": "Economic and Social Research Institute",
+    "synonym": []
+  },
+  {
+    "id": "501100001588",
+    "uri": "http://dx.doi.org/10.13039/501100001588",
+    "name": "Enterprise Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001589",
+    "uri": "http://dx.doi.org/10.13039/501100001589",
+    "name": "Environmental Protection Agency",
+    "synonym": []
+  },
+  {
+    "id": "501100001591",
+    "uri": "http://dx.doi.org/10.13039/501100001591",
+    "name": "Heritage Council",
+    "synonym": []
+  },
+  {
+    "id": "501100001592",
+    "uri": "http://dx.doi.org/10.13039/501100001592",
+    "name": "Higher Education Authority",
+    "synonym": []
+  },
+  {
+    "id": "501100001593",
+    "uri": "http://dx.doi.org/10.13039/501100001593",
+    "name": "Irish Cancer Society",
+    "synonym": []
+  },
+  {
+    "id": "501100001594",
+    "uri": "http://dx.doi.org/10.13039/501100001594",
+    "name": "Irish Heart Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100001595",
+    "uri": "http://dx.doi.org/10.13039/501100001595",
+    "name": "Irish Hospice Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100001596",
+    "uri": "http://dx.doi.org/10.13039/501100001596",
+    "name": "Irish Research Council for Science, Engineering and Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100001597",
+    "uri": "http://dx.doi.org/10.13039/501100001597",
+    "name": "Irish Research Council for the Humanities and Social Sciences",
+    "synonym": []
+  },
+  {
+    "id": "501100001598",
+    "uri": "http://dx.doi.org/10.13039/501100001598",
+    "name": "Mental Health Commission",
+    "synonym": []
+  },
+  {
+    "id": "501100001600",
+    "uri": "http://dx.doi.org/10.13039/501100001600",
+    "name": "Research and Education Foundation, Sligo General Hospital",
+    "synonym": []
+  },
+  {
+    "id": "501100001601",
+    "uri": "http://dx.doi.org/10.13039/501100001601",
+    "name": "Royal Irish Academy",
+    "synonym": []
+  },
+  {
+    "id": "501100001603",
+    "uri": "http://dx.doi.org/10.13039/501100001603",
+    "name": "Sustainable Energy Authority of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100001604",
+    "uri": "http://dx.doi.org/10.13039/501100001604",
+    "name": "Teagasc",
+    "synonym": []
+  },
+  {
+    "id": "501100001627",
+    "uri": "http://dx.doi.org/10.13039/501100001627",
+    "name": "Marine Institute",
+    "synonym": []
+  },
+  {
+    "id": "501100001628",
+    "uri": "http://dx.doi.org/10.13039/501100001628",
+    "name": "Central Remedial Clinic",
+    "synonym": []
+  },
+  {
+    "id": "501100001629",
+    "uri": "http://dx.doi.org/10.13039/501100001629",
+    "name": "Royal Dublin Society",
+    "synonym": []
+  },
+  {
+    "id": "501100001630",
+    "uri": "http://dx.doi.org/10.13039/501100001630",
+    "name": "Dublin Institute for Advanced Studies",
+    "synonym": []
+  },
+  {
+    "id": "501100001631",
+    "uri": "http://dx.doi.org/10.13039/501100001631",
+    "name": "University College Dublin",
+    "synonym": []
+  },
+  {
+    "id": "501100001633",
+    "uri": "http://dx.doi.org/10.13039/501100001633",
+    "name": "National University of Ireland, Maynooth",
+    "synonym": []
+  },
+  {
+    "id": "501100001634",
+    "uri": "http://dx.doi.org/10.13039/501100001634",
+    "name": "University of Galway",
+    "synonym": []
+  },
+  {
+    "id": "501100001635",
+    "uri": "http://dx.doi.org/10.13039/501100001635",
+    "name": "University of Limerick",
+    "synonym": []
+  },
+  {
+    "id": "501100001636",
+    "uri": "http://dx.doi.org/10.13039/501100001636",
+    "name": "University College Cork",
+    "synonym": []
+  },
+  {
+    "id": "501100001637",
+    "uri": "http://dx.doi.org/10.13039/501100001637",
+    "name": "Trinity College Dublin",
+    "synonym": []
+  },
+  {
+    "id": "501100001638",
+    "uri": "http://dx.doi.org/10.13039/501100001638",
+    "name": "Dublin City University",
+    "synonym": []
+  },
+  {
+    "id": "501100002081",
+    "uri": "http://dx.doi.org/10.13039/501100002081",
+    "name": "Irish Research Council",
+    "synonym": []
+  },
+  {
+    "id": "501100002736",
+    "uri": "http://dx.doi.org/10.13039/501100002736",
+    "name": "Covidien",
+    "synonym": []
+  },
+  {
+    "id": "501100002755",
+    "uri": "http://dx.doi.org/10.13039/501100002755",
+    "name": "Brennan and Company",
+    "synonym": []
+  },
+  {
+    "id": "501100002919",
+    "uri": "http://dx.doi.org/10.13039/501100002919",
+    "name": "Cork Institute of Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100002959",
+    "uri": "http://dx.doi.org/10.13039/501100002959",
+    "name": "Dublin City Council",
+    "synonym": []
+  },
+  {
+    "id": "501100003036",
+    "uri": "http://dx.doi.org/10.13039/501100003036",
+    "name": "Perrigo Company Charitable Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100003037",
+    "uri": "http://dx.doi.org/10.13039/501100003037",
+    "name": "Elan",
+    "synonym": []
+  },
+  {
+    "id": "501100003496",
+    "uri": "http://dx.doi.org/10.13039/501100003496",
+    "name": "HeyStaks Technologies",
+    "synonym": []
+  },
+  {
+    "id": "501100003553",
+    "uri": "http://dx.doi.org/10.13039/501100003553",
+    "name": "Gaelic Athletic Association",
+    "synonym": []
+  },
+  {
+    "id": "501100003840",
+    "uri": "http://dx.doi.org/10.13039/501100003840",
+    "name": "Irish Institute of Clinical Neuroscience",
+    "synonym": []
+  },
+  {
+    "id": "501100003956",
+    "uri": "http://dx.doi.org/10.13039/501100003956",
+    "name": "Aspect Medical Systems",
+    "synonym": []
+  },
+  {
+    "id": "501100004162",
+    "uri": "http://dx.doi.org/10.13039/501100004162",
+    "name": "Meath Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100004210",
+    "uri": "http://dx.doi.org/10.13039/501100004210",
+    "name": "Our Lady's Children's Hospital, Crumlin",
+    "synonym": []
+  },
+  {
+    "id": "501100004321",
+    "uri": "http://dx.doi.org/10.13039/501100004321",
+    "name": "Shire",
+    "synonym": []
+  },
+  {
+    "id": "501100004981",
+    "uri": "http://dx.doi.org/10.13039/501100004981",
+    "name": "Athlone Institute of Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100006518",
+    "uri": "http://dx.doi.org/10.13039/501100006518",
+    "name": "Department of Communications, Energy and Natural Resources, Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100006553",
+    "uri": "http://dx.doi.org/10.13039/501100006553",
+    "name": "Collaborative Centre for Applied Nanotechnology",
+    "synonym": []
+  },
+  {
+    "id": "501100006759",
+    "uri": "http://dx.doi.org/10.13039/501100006759",
+    "name": "CLARITY Centre for Sensor Web Technologies",
+    "synonym": []
+  },
+  {
+    "id": "501100009246",
+    "uri": "http://dx.doi.org/10.13039/501100009246",
+    "name": "Technological University Dublin",
+    "synonym": []
+  },
+  {
+    "id": "501100009269",
+    "uri": "http://dx.doi.org/10.13039/501100009269",
+    "name": "Programme of Competitive Forestry Research for Development",
+    "synonym": []
+  },
+  {
+    "id": "501100009315",
+    "uri": "http://dx.doi.org/10.13039/501100009315",
+    "name": "Cystinosis Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100010808",
+    "uri": "http://dx.doi.org/10.13039/501100010808",
+    "name": "Geological Survey of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100011030",
+    "uri": "http://dx.doi.org/10.13039/501100011030",
+    "name": "Alimentary Glycoscience Research Cluster",
+    "synonym": []
+  },
+  {
+    "id": "501100011031",
+    "uri": "http://dx.doi.org/10.13039/501100011031",
+    "name": "Alimentary Health",
+    "synonym": []
+  },
+  {
+    "id": "501100011103",
+    "uri": "http://dx.doi.org/10.13039/501100011103",
+    "name": "Rann\u00eds",
+    "synonym": []
+  },
+  {
+    "id": "501100012354",
+    "uri": "http://dx.doi.org/10.13039/501100012354",
+    "name": "Inland Fisheries Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100014384",
+    "uri": "http://dx.doi.org/10.13039/501100014384",
+    "name": "X-Bolt Orthopaedics",
+    "synonym": []
+  },
+  {
+    "id": "501100014710",
+    "uri": "http://dx.doi.org/10.13039/501100014710",
+    "name": "PrecisionBiotics Group",
+    "synonym": []
+  },
+  {
+    "id": "501100014827",
+    "uri": "http://dx.doi.org/10.13039/501100014827",
+    "name": "Dormant Accounts Fund",
+    "synonym": []
+  },
+  {
+    "id": "501100016041",
+    "uri": "http://dx.doi.org/10.13039/501100016041",
+    "name": "St Vincents Anaesthesia Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100017501",
+    "uri": "http://dx.doi.org/10.13039/501100017501",
+    "name": "FotoNation",
+    "synonym": []
+  },
+  {
+    "id": "501100018641",
+    "uri": "http://dx.doi.org/10.13039/501100018641",
+    "name": "Dairy Research Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100018839",
+    "uri": "http://dx.doi.org/10.13039/501100018839",
+    "name": "Irish Centre for High-End Computing",
+    "synonym": []
+  },
+  {
+    "id": "501100019905",
+    "uri": "http://dx.doi.org/10.13039/501100019905",
+    "name": "Galway University Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100020036",
+    "uri": "http://dx.doi.org/10.13039/501100020036",
+    "name": "Dystonia Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100020221",
+    "uri": "http://dx.doi.org/10.13039/501100020221",
+    "name": "Irish Motor Neurone Disease Association",
+    "synonym": []
+  },
+  {
+    "id": "501100020270",
+    "uri": "http://dx.doi.org/10.13039/501100020270",
+    "name": "Advanced Materials and Bioengineering Research",
+    "synonym": []
+  },
+  {
+    "id": "501100020403",
+    "uri": "http://dx.doi.org/10.13039/501100020403",
+    "name": "Irish Composites Centre",
+    "synonym": []
+  },
+  {
+    "id": "501100020425",
+    "uri": "http://dx.doi.org/10.13039/501100020425",
+    "name": "Irish Thoracic Society",
+    "synonym": []
+  },
+  {
+    "id": "501100021102",
+    "uri": "http://dx.doi.org/10.13039/501100021102",
+    "name": "Waterford Institute of Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100021110",
+    "uri": "http://dx.doi.org/10.13039/501100021110",
+    "name": "Irish MPS Society",
+    "synonym": []
+  },
+  {
+    "id": "501100021525",
+    "uri": "http://dx.doi.org/10.13039/501100021525",
+    "name": "Insight SFI Research Centre for Data Analytics",
+    "synonym": []
+  },
+  {
+    "id": "501100021694",
+    "uri": "http://dx.doi.org/10.13039/501100021694",
+    "name": "Elan Pharma International",
+    "synonym": []
+  },
+  {
+    "id": "501100021838",
+    "uri": "http://dx.doi.org/10.13039/501100021838",
+    "name": "Royal College of Physicians of Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100022542",
+    "uri": "http://dx.doi.org/10.13039/501100022542",
+    "name": "Breakthrough Cancer Research",
+    "synonym": []
+  },
+  {
+    "id": "501100022610",
+    "uri": "http://dx.doi.org/10.13039/501100022610",
+    "name": "Breast Cancer Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100022728",
+    "uri": "http://dx.doi.org/10.13039/501100022728",
+    "name": "Munster Technological University",
+    "synonym": []
+  },
+  {
+    "id": "501100022729",
+    "uri": "http://dx.doi.org/10.13039/501100022729",
+    "name": "Institute of Technology, Tralee",
+    "synonym": []
+  },
+  {
+    "id": "501100023273",
+    "uri": "http://dx.doi.org/10.13039/501100023273",
+    "name": "HRB Clinical Research Facility Galway",
+    "synonym": []
+  },
+  {
+    "id": "501100023378",
+    "uri": "http://dx.doi.org/10.13039/501100023378",
+    "name": "Lauritzson Foundation",
+    "synonym": []
+  },
+  {
+    "id": "501100023551",
+    "uri": "http://dx.doi.org/10.13039/501100023551",
+    "name": "Cystic Fibrosis Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100023970",
+    "uri": "http://dx.doi.org/10.13039/501100023970",
+    "name": "Tyndall National Institute",
+    "synonym": []
+  },
+  {
+    "id": "501100024094",
+    "uri": "http://dx.doi.org/10.13039/501100024094",
+    "name": "Raidi\u00f3 Teilif\u00eds \u00c9ireann",
+    "synonym": []
+  },
+  {
+    "id": "501100024242",
+    "uri": "http://dx.doi.org/10.13039/501100024242",
+    "name": "Synthesis and Solid State Pharmaceutical Centre",
+    "synonym": []
+  },
+  {
+    "id": "501100024313",
+    "uri": "http://dx.doi.org/10.13039/501100024313",
+    "name": "Irish Rugby Football Union",
+    "synonym": []
+  },
+  {
+    "id": "100007490",
+    "uri": "http://dx.doi.org/10.13039/100007490",
+    "name": "Bausch and Lomb Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100007819",
+    "uri": "http://dx.doi.org/10.13039/100007819",
+    "name": "Allergan",
+    "synonym": []
+  },
+  {
+    "id": "100010547",
+    "uri": "http://dx.doi.org/10.13039/100010547",
+    "name": "Irish Youth Justice Service",
+    "synonym": []
+  },
+  {
+    "id": "100012733",
+    "uri": "http://dx.doi.org/10.13039/100012733",
+    "name": "National Parks and Wildlife Service",
+    "synonym": []
+  },
+  {
+    "id": "100015278",
+    "uri": "http://dx.doi.org/10.13039/100015278",
+    "name": "Pfizer Healthcare Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100017144",
+    "uri": "http://dx.doi.org/10.13039/100017144",
+    "name": "Shell E and P Ireland",
+    "synonym": []
+  },
+  {
+    "id": "100022895",
+    "uri": "http://dx.doi.org/10.13039/100022895",
+    "name": "Health Research Institute, University of Limerick",
+    "synonym": []
+  },
+  {
+    "id": "501100001599",
+    "uri": "http://dx.doi.org/10.13039/501100001599",
+    "name": "National Council for Forest Research and Development",
+    "synonym": []
+  },
+  {
+    "id": "501100006554",
+    "uri": "http://dx.doi.org/10.13039/501100006554",
+    "name": "IDA Ireland",
+    "synonym": []
+  },
+  {
+    "id": "501100011626",
+    "uri": "http://dx.doi.org/10.13039/501100011626",
+    "name": "Energy Policy Research Centre, Economic and Social Research Institute",
+    "synonym": []
+  },
+  {
+    "id": "501100014531",
+    "uri": "http://dx.doi.org/10.13039/501100014531",
+    "name": "Physical Education and Sport Sciences Department, University of Limerick",
+    "synonym": []
+  },
+  {
+    "id": "501100014745",
+    "uri": "http://dx.doi.org/10.13039/501100014745",
+    "name": "APC Microbiome Institute",
+    "synonym": []
+  },
+  {
+    "id": "501100014826",
+    "uri": "http://dx.doi.org/10.13039/501100014826",
+    "name": "ADAPT - Centre for Digital Content Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100020570",
+    "uri": "http://dx.doi.org/10.13039/501100020570",
+    "name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway",
+    "synonym": []
+  },
+  {
+    "id": "501100020871",
+    "uri": "http://dx.doi.org/10.13039/501100020871",
+    "name": "Bernal Institute, University of Limerick",
+    "synonym": []
+  },
+  {
+    "id": "501100023852",
+    "uri": "http://dx.doi.org/10.13039/501100023852",
+    "name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway",
+    "synonym": []
+  }
+]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
@ -133,32 +133,6 @@
            <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="PreProcessORCID"/>
-        <error to="Kill"/>
-    </action>
-
-    <!--  ORCID  SECTION -->
-    <action name="PreProcessORCID">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn-cluster</master>
-            <mode>cluster</mode>
-            <name>Convert ORCID to Dataset</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
-            <jar>dhp-doiboost-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
-            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
-            <arg>--master</arg><arg>yarn-cluster</arg>
-        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
@ -59,10 +59,10 @@
        </property>

        <!--    ORCID Parameters    -->
-        <property>
-            <name>workingPathOrcid</name>
-            <description>the ORCID working path</description>
-        </property>
+<!--        <property>-->
+<!--            <name>workingPathOrcid</name>-->
+<!--            <description>the ORCID working path</description>-->
+<!--        </property>-->

    </parameters>

@ -84,7 +84,6 @@
            <case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
            <case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
            <case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
-            <case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
            <case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
            <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
            <default to="ConvertCrossrefToOAF"/>
@ -170,32 +169,6 @@
            <arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="ProcessORCID"/>
-        <error to="Kill"/>
-    </action>
-
-    <!--  ORCID  SECTION -->
-    <action name="ProcessORCID">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn-cluster</master>
-            <mode>cluster</mode>
-            <name>Convert ORCID to Dataset</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
-            <jar>dhp-doiboost-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
-            <arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
-            <arg>--master</arg><arg>yarn-cluster</arg>
-        </spark>
        <ok to="CreateDOIBoost"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
      Encoders.tuple(Encoders.STRING, mapEncoderPub)
    implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]

-    logger.info("Phase 2) Join Crossref with UnpayWall")
+    logger.info("Phase 1) Join Crossref with UnpayWall")

    val crossrefPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/firstJoin")
-    logger.info("Phase 3) Join Result with ORCID")
-    val fj: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
-    val orcidPublication: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
-    fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
-      .map(applyMerge)
-      .write
-      .mode(SaveMode.Overwrite)
-      .save(s"$workingDirPath/secondJoin")

-    logger.info("Phase 4) Join Result with MAG")
+    logger.info("Phase 2) Join Result with MAG")
    val sj: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
+      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))

    val magPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
--- a/Show More
+++ b/Show More