Fill the new mergedIds field when generating dedup records

Filter out dedup records composed of invisible records only Filter out mergerels that have not been used when creating the dedup record (ungrouping of cliques)
Merge pull request '#9839 : include claimed affiliation relationships' (#476 ) from claim-orgs into beta
2024-10-28 13:31:01 +01:00 · 2024-10-25 10:12:59 +02:00 · 2024-10-25 10:12:06 +02:00 · 2024-10-25 10:10:44 +02:00 · 2024-10-25 10:09:56 +02:00 · 2024-10-25 09:13:54 +02:00
131 changed files with 3116 additions and 4087 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -7,12 +7,12 @@ import java.sql.*;
 import java.util.function.Consumer;

 import org.apache.commons.lang3.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;

 public class DbClient implements Closeable {

-	private static final Logger log = LoggerFactory.getLogger(DbClient.class);
+	private static final Log log = LogFactory.getLog(DbClient.class);

 	private final Connection connection;

@ -37,8 +37,6 @@ public class DbClient implements Closeable {
 		try (final Statement stmt = connection.createStatement()) {
 			stmt.setFetchSize(100);

-			log.info("running SQL:\n\n{}\n\n", sql);
-
 			try (final ResultSet rs = stmt.executeQuery(sql)) {
 				while (rs.next()) {
 					consumer.accept(rs);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import okhttp3.MediaType;
+import okhttp3.RequestBody;
+import okhttp3.internal.Util;
+import okio.BufferedSink;
+import okio.Okio;
+import okio.Source;
+
+public class InputStreamRequestBody extends RequestBody {
+
+	private final InputStream inputStream;
+	private final MediaType mediaType;
+	private final long lenght;
+
+	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
+
+		return new InputStreamRequestBody(inputStream, mediaType, len);
+	}
+
+	private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
+		this.inputStream = inputStream;
+		this.mediaType = mediaType;
+		this.lenght = len;
+	}
+
+	@Override
+	public MediaType contentType() {
+		return mediaType;
+	}
+
+	@Override
+	public long contentLength() {
+
+		return lenght;
+
+	}
+
+	@Override
+	public void writeTo(BufferedSink sink) throws IOException {
+		Source source = null;
+		try {
+			source = Okio.source(inputStream);
+			sink.writeAll(source);
+		} finally {
+			Util.closeQuietly(source);
+		}
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@ -0,0 +1,8 @@
+
+package eu.dnetlib.dhp.common.api;
+
+public class MissingConceptDoiException extends Throwable {
+	public MissingConceptDoiException(String message) {
+		super(message);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -0,0 +1,363 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.*;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.http.HttpHeaders;
+import org.apache.http.entity.ContentType;
+import org.jetbrains.annotations.NotNull;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
+import okhttp3.*;
+
+public class ZenodoAPIClient implements Serializable {
+
+	String urlString;
+	String bucket;
+
+	String deposition_id;
+	String access_token;
+
+	public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
+
+	private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
+
+	public String getUrlString() {
+		return urlString;
+	}
+
+	public void setUrlString(String urlString) {
+		this.urlString = urlString;
+	}
+
+	public String getBucket() {
+		return bucket;
+	}
+
+	public void setBucket(String bucket) {
+		this.bucket = bucket;
+	}
+
+	public void setDeposition_id(String deposition_id) {
+		this.deposition_id = deposition_id;
+	}
+
+	public ZenodoAPIClient(String urlString, String access_token) {
+
+		this.urlString = urlString;
+		this.access_token = access_token;
+	}
+
+	/**
+	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
+	 *
+	 * @return response code
+	 * @throws IOException
+	 */
+	public int newDeposition() throws IOException {
+		String json = "{}";
+
+		URL url = new URL(urlString);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setRequestMethod("POST");
+		conn.setDoOutput(true);
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+		conn.disconnect();
+
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
+		this.bucket = newSubmission.getLinks().getBucket();
+		this.deposition_id = newSubmission.getId();
+
+		return responseCode;
+	}
+
+	/**
+	 * Upload files in Zenodo.
+	 *
+	 * @param is the inputStream for the file to upload
+	 * @param file_name the name of the file as it will appear on Zenodo
+	 * @return the response code
+	 */
+	public int uploadIS(InputStream is, String file_name) throws IOException {
+
+		URL url = new URL(bucket + "/" + file_name);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("PUT");
+
+		byte[] buf = new byte[8192];
+		int length;
+		try (OutputStream os = conn.getOutputStream()) {
+			while ((length = is.read(buf)) != -1) {
+				os.write(buf, 0, length);
+			}
+
+		}
+		int responseCode = conn.getResponseCode();
+		if (!checkOKStatus(responseCode)) {
+			throw new IOException("Unexpected code " + responseCode + getBody(conn));
+		}
+
+		return responseCode;
+	}
+
+	@NotNull
+	private String getBody(HttpURLConnection conn) throws IOException {
+		String body = "{}";
+		try (BufferedReader br = new BufferedReader(
+			new InputStreamReader(conn.getInputStream(), "utf-8"))) {
+			StringBuilder response = new StringBuilder();
+			String responseLine = null;
+			while ((responseLine = br.readLine()) != null) {
+				response.append(responseLine.trim());
+			}
+
+			body = response.toString();
+
+		}
+		return body;
+	}
+
+	/**
+	 * Associates metadata information to the current deposition
+	 *
+	 * @param metadata the metadata
+	 * @return response code
+	 * @throws IOException
+	 */
+	public int sendMretadata(String metadata) throws IOException {
+
+		URL url = new URL(urlString + "/" + deposition_id);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("PUT");
+
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = metadata.getBytes("utf-8");
+			os.write(input, 0, input.length);
+
+		}
+
+		final int responseCode = conn.getResponseCode();
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + getBody(conn));
+
+		return responseCode;
+
+	}
+
+	private boolean checkOKStatus(int responseCode) {
+
+		if (HttpURLConnection.HTTP_OK != responseCode ||
+			HttpURLConnection.HTTP_CREATED != responseCode)
+			return true;
+		return false;
+	}
+
+	/**
+	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
+	 *
+	 * @return response code
+	 * @throws IOException
+	 */
+	@Deprecated
+	public int publish() throws IOException {
+
+		String json = "{}";
+
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+
+		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
+
+		Request request = new Request.Builder()
+			.url(urlString + "/" + deposition_id + "/actions/publish")
+			.addHeader("Authorization", "Bearer " + access_token)
+			.post(body)
+			.build();
+
+		try (Response response = httpClient.newCall(request).execute()) {
+
+			if (!response.isSuccessful())
+				throw new IOException("Unexpected code " + response + response.body().string());
+
+			return response.code();
+
+		}
+	}
+
+	/**
+	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
+	 * for the new version.
+	 *
+	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
+	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
+	 *            concept_rec_id = 656930
+	 * @return response code
+	 */
+	public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
+		setDepositionId(concept_rec_id, 1);
+		String json = "{}";
+
+		URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("POST");
+
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+		String latest_draft = zenodoModel.getLinks().getLatest_draft();
+		deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
+		bucket = getBucket(latest_draft);
+
+		return responseCode;
+
+	}
+
+	/**
+	 * To finish uploading a version or new deposition not published
+	 * It sets the deposition_id and the bucket to be used
+	 *
+	 *
+	 * @param deposition_id the deposition id of the not yet published upload
+	 *            concept_rec_id = 656930
+	 * @return response code
+	 * @throws IOException
+	 * @throws MissingConceptDoiException
+	 */
+	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
+
+		this.deposition_id = deposition_id;
+
+		String json = "{}";
+
+		URL url = new URL(urlString + "/" + deposition_id);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setRequestMethod("POST");
+		conn.setDoOutput(true);
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+		conn.disconnect();
+
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+		bucket = zenodoModel.getLinks().getBucket();
+
+		return responseCode;
+
+	}
+
+	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
+
+		ZenodoModelList zenodoModelList = new Gson()
+			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
+
+		for (ZenodoModel zm : zenodoModelList) {
+			if (zm.getConceptrecid().equals(concept_rec_id)) {
+				deposition_id = zm.getId();
+				return;
+			}
+		}
+		if (zenodoModelList.size() == 0)
+			throw new MissingConceptDoiException(
+				"The concept record id specified was missing in the list of depositions");
+		setDepositionId(concept_rec_id, page + 1);
+
+	}
+
+	private String getPrevDepositions(String page) throws IOException {
+
+		HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
+		urlBuilder.addQueryParameter("page", page);
+
+		URL url = new URL(urlBuilder.build().toString());
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("GET");
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		return body;
+
+	}
+
+	private String getBucket(String inputUurl) throws IOException {
+
+		URL url = new URL(inputUurl);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("GET");
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+
+		return zenodoModel.getLinks().getBucket();
+
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Community {
+	private String identifier;
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public void setIdentifier(String identifier) {
+		this.identifier = identifier;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -0,0 +1,47 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Creator {
+	private String affiliation;
+	private String name;
+	private String orcid;
+
+	public String getAffiliation() {
+		return affiliation;
+	}
+
+	public void setAffiliation(String affiliation) {
+		this.affiliation = affiliation;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public String getOrcid() {
+		return orcid;
+	}
+
+	public void setOrcid(String orcid) {
+		this.orcid = orcid;
+	}
+
+	public static Creator newInstance(String name, String affiliation, String orcid) {
+		Creator c = new Creator();
+		if (name != null) {
+			c.name = name;
+		}
+		if (affiliation != null) {
+			c.affiliation = affiliation;
+		}
+		if (orcid != null) {
+			c.orcid = orcid;
+		}
+
+		return c;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -0,0 +1,44 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class File implements Serializable {
+	private String checksum;
+	private String filename;
+	private long filesize;
+	private String id;
+
+	public String getChecksum() {
+		return checksum;
+	}
+
+	public void setChecksum(String checksum) {
+		this.checksum = checksum;
+	}
+
+	public String getFilename() {
+		return filename;
+	}
+
+	public void setFilename(String filename) {
+		this.filename = filename;
+	}
+
+	public long getFilesize() {
+		return filesize;
+	}
+
+	public void setFilesize(long filesize) {
+		this.filesize = filesize;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@ -0,0 +1,23 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Grant implements Serializable {
+	private String id;
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public static Grant newInstance(String id) {
+		Grant g = new Grant();
+		g.id = id;
+
+		return g;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@ -0,0 +1,92 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Links implements Serializable {
+
+	private String bucket;
+
+	private String discard;
+
+	private String edit;
+	private String files;
+	private String html;
+	private String latest_draft;
+	private String latest_draft_html;
+	private String publish;
+
+	private String self;
+
+	public String getBucket() {
+		return bucket;
+	}
+
+	public void setBucket(String bucket) {
+		this.bucket = bucket;
+	}
+
+	public String getDiscard() {
+		return discard;
+	}
+
+	public void setDiscard(String discard) {
+		this.discard = discard;
+	}
+
+	public String getEdit() {
+		return edit;
+	}
+
+	public void setEdit(String edit) {
+		this.edit = edit;
+	}
+
+	public String getFiles() {
+		return files;
+	}
+
+	public void setFiles(String files) {
+		this.files = files;
+	}
+
+	public String getHtml() {
+		return html;
+	}
+
+	public void setHtml(String html) {
+		this.html = html;
+	}
+
+	public String getLatest_draft() {
+		return latest_draft;
+	}
+
+	public void setLatest_draft(String latest_draft) {
+		this.latest_draft = latest_draft;
+	}
+
+	public String getLatest_draft_html() {
+		return latest_draft_html;
+	}
+
+	public void setLatest_draft_html(String latest_draft_html) {
+		this.latest_draft_html = latest_draft_html;
+	}
+
+	public String getPublish() {
+		return publish;
+	}
+
+	public void setPublish(String publish) {
+		this.publish = publish;
+	}
+
+	public String getSelf() {
+		return self;
+	}
+
+	public void setSelf(String self) {
+		this.self = self;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@ -0,0 +1,153 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class Metadata implements Serializable {
+
+	private String access_right;
+	private List<Community> communities;
+	private List<Creator> creators;
+	private String description;
+	private String doi;
+	private List<Grant> grants;
+	private List<String> keywords;
+	private String language;
+	private String license;
+	private PrereserveDoi prereserve_doi;
+	private String publication_date;
+	private List<String> references;
+	private List<RelatedIdentifier> related_identifiers;
+	private String title;
+	private String upload_type;
+	private String version;
+
+	public String getUpload_type() {
+		return upload_type;
+	}
+
+	public void setUpload_type(String upload_type) {
+		this.upload_type = upload_type;
+	}
+
+	public String getVersion() {
+		return version;
+	}
+
+	public void setVersion(String version) {
+		this.version = version;
+	}
+
+	public String getAccess_right() {
+		return access_right;
+	}
+
+	public void setAccess_right(String access_right) {
+		this.access_right = access_right;
+	}
+
+	public List<Community> getCommunities() {
+		return communities;
+	}
+
+	public void setCommunities(List<Community> communities) {
+		this.communities = communities;
+	}
+
+	public List<Creator> getCreators() {
+		return creators;
+	}
+
+	public void setCreators(List<Creator> creators) {
+		this.creators = creators;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public void setDescription(String description) {
+		this.description = description;
+	}
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public List<Grant> getGrants() {
+		return grants;
+	}
+
+	public void setGrants(List<Grant> grants) {
+		this.grants = grants;
+	}
+
+	public List<String> getKeywords() {
+		return keywords;
+	}
+
+	public void setKeywords(List<String> keywords) {
+		this.keywords = keywords;
+	}
+
+	public String getLanguage() {
+		return language;
+	}
+
+	public void setLanguage(String language) {
+		this.language = language;
+	}
+
+	public String getLicense() {
+		return license;
+	}
+
+	public void setLicense(String license) {
+		this.license = license;
+	}
+
+	public PrereserveDoi getPrereserve_doi() {
+		return prereserve_doi;
+	}
+
+	public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
+		this.prereserve_doi = prereserve_doi;
+	}
+
+	public String getPublication_date() {
+		return publication_date;
+	}
+
+	public void setPublication_date(String publication_date) {
+		this.publication_date = publication_date;
+	}
+
+	public List<String> getReferences() {
+		return references;
+	}
+
+	public void setReferences(List<String> references) {
+		this.references = references;
+	}
+
+	public List<RelatedIdentifier> getRelated_identifiers() {
+		return related_identifiers;
+	}
+
+	public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
+		this.related_identifiers = related_identifiers;
+	}
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class PrereserveDoi implements Serializable {
+	private String doi;
+	private String recid;
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public String getRecid() {
+		return recid;
+	}
+
+	public void setRecid(String recid) {
+		this.recid = recid;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@ -0,0 +1,43 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class RelatedIdentifier implements Serializable {
+	private String identifier;
+	private String relation;
+	private String resource_type;
+	private String scheme;
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public void setIdentifier(String identifier) {
+		this.identifier = identifier;
+	}
+
+	public String getRelation() {
+		return relation;
+	}
+
+	public void setRelation(String relation) {
+		this.relation = relation;
+	}
+
+	public String getResource_type() {
+		return resource_type;
+	}
+
+	public void setResource_type(String resource_type) {
+		this.resource_type = resource_type;
+	}
+
+	public String getScheme() {
+		return scheme;
+	}
+
+	public void setScheme(String scheme) {
+		this.scheme = scheme;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@ -0,0 +1,118 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class ZenodoModel implements Serializable {
+
+	private String conceptrecid;
+	private String created;
+
+	private List<File> files;
+	private String id;
+	private Links links;
+	private Metadata metadata;
+	private String modified;
+	private String owner;
+	private String record_id;
+	private String state;
+	private boolean submitted;
+	private String title;
+
+	public String getConceptrecid() {
+		return conceptrecid;
+	}
+
+	public void setConceptrecid(String conceptrecid) {
+		this.conceptrecid = conceptrecid;
+	}
+
+	public String getCreated() {
+		return created;
+	}
+
+	public void setCreated(String created) {
+		this.created = created;
+	}
+
+	public List<File> getFiles() {
+		return files;
+	}
+
+	public void setFiles(List<File> files) {
+		this.files = files;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public Links getLinks() {
+		return links;
+	}
+
+	public void setLinks(Links links) {
+		this.links = links;
+	}
+
+	public Metadata getMetadata() {
+		return metadata;
+	}
+
+	public void setMetadata(Metadata metadata) {
+		this.metadata = metadata;
+	}
+
+	public String getModified() {
+		return modified;
+	}
+
+	public void setModified(String modified) {
+		this.modified = modified;
+	}
+
+	public String getOwner() {
+		return owner;
+	}
+
+	public void setOwner(String owner) {
+		this.owner = owner;
+	}
+
+	public String getRecord_id() {
+		return record_id;
+	}
+
+	public void setRecord_id(String record_id) {
+		this.record_id = record_id;
+	}
+
+	public String getState() {
+		return state;
+	}
+
+	public void setState(String state) {
+		this.state = state;
+	}
+
+	public boolean isSubmitted() {
+		return submitted;
+	}
+
+	public void setSubmitted(boolean submitted) {
+		this.submitted = submitted;
+	}
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.util.ArrayList;
+
+public class ZenodoModelList extends ArrayList<ZenodoModel> {
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
@ -65,7 +65,13 @@ public class RunSQLSparkJob {
 				for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) {
 					log.info("executing: {}", statement);
 					long startTime = System.currentTimeMillis();
+					try {
 						spark.sql(statement).show();
+					} catch (Exception e) {
+						log.error("Error executing statement: {}", statement, e);
+						System.err.println("Error executing statement: " + statement + "\n" + e);
+						throw e;
+					}
 					log
 						.info(
 							"executed in {}",
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 							.getContext()
 							.stream()
 							.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
-							.collect(Collectors.toCollection(ArrayList::new)));
+							.collect(Collectors.toList()));
 			}
 			return (T) res;
 		} else {
@ -1015,41 +1015,4 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 			.orElse(null);
 	}

-	/**
-	 * Implements bad and ugly things that we should get rid of ASAP.
-	 *
-	 * @param value
-	 * @return
-	 * @param <T>
-	 */
-	public static <T extends Oaf> T dedicatedUglyHacks(T value) {
-		if (value instanceof OafEntity) {
-			if (value instanceof Result) {
-				final Result r = (Result) value;
-
-				// Fix for AMS Acta
-				Optional
-					.ofNullable(r.getInstance())
-					.map(
-						instance -> instance
-							.stream()
-							.filter(
-								i -> Optional
-									.ofNullable(i.getHostedby())
-									.map(KeyValue::getKey)
-									.map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016"))
-									.orElse(false)))
-					.ifPresent(instance -> instance.forEach(i -> {
-						if (Optional
-							.ofNullable(i.getPid())
-							.map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta")))
-							.orElse(false)) {
-							i.setHostedby(UNKNOWN_REPOSITORY);
-						}
-					}));
-			}
-		}
-		return value;
-	}
-
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -433,10 +433,7 @@ public class MergeUtils {

 		// merge datainfo for same context id
 		merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
-			ArrayList<DataInfo> di = new ArrayList<>();
-			di.addAll(r.getDataInfo());
-			di.addAll(l.getDataInfo());
-			r.setDataInfo(di);
+			r.getDataInfo().addAll(l.getDataInfo());
 			return r;
 		}));

--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -0,0 +1,109 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+@Disabled
+class ZenodoAPIClientTest {
+
+	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
+	private final String ACCESS_TOKEN = "";
+
+	private final String CONCEPT_REC_ID = "657113";
+
+	private final String depositionId = "674915";
+
+	@Test
+	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
+
+		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+		Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewDeposition() throws IOException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+		Assertions.assertEquals(201, client.newDeposition());
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
+
+		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+		Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewVersionNewName() throws IOException, MissingConceptDoiException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+
+		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/newVersion")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewVersionOldName() throws IOException, MissingConceptDoiException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+
+		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -177,9 +177,9 @@ class OafMapperUtilsTest {
 		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));

 		assertEquals(
-			ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
+			ModelConstants.DATASET_RESULTTYPE_CLASSID,
 			((Result) MergeUtils
-				.merge(p2, d1))
+				.merge(p2, d1, true))
 					.getResulttype()
 					.getClassid());
 	}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@ -90,7 +90,7 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		inferFrom = normalize(inferFrom);
 		inferFrom = filterAllStopWords(inferFrom);
 		Set<String> cities = getCities(inferFrom, 4);
-		return citiesToCountry(cities).stream().findFirst().orElse("UNKNOWN");
+		return citiesToCountry(cities).stream().filter(Objects::nonNull).findFirst().orElse("UNKNOWN");
 	}

 	public static String cityInference(String original) {
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java
@ -1,8 +1,7 @@

 package eu.dnetlib.pace.common;

-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;

 import org.junit.jupiter.api.*;

@ -54,8 +53,17 @@ public class PaceFunctionTest extends AbstractPaceFunctions {
 		System.out.println("Fixed aliases  : " + fixAliases(TEST_STRING));
 	}

+	@Test()
+	public void countryInferenceTest_NPE() {
+		assertThrows(
+			NullPointerException.class,
+			() -> countryInference("UNKNOWN", null),
+			"Expected countryInference() to throw an NPE");
+	}
+
 	@Test
 	public void countryInferenceTest() {
+		assertEquals("UNKNOWN", countryInference("UNKNOWN", ""));
 		assertEquals("IT", countryInference("UNKNOWN", "Università di Bologna"));
 		assertEquals("UK", countryInference("UK", "Università di Bologna"));
 		assertEquals("IT", countryInference("UNKNOWN", "Universiteé de Naples"));
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
@ -11,7 +11,6 @@ import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

 import eu.dnetlib.pace.model.Person;
-import jdk.nashorn.internal.ir.annotations.Ignore;

 public class UtilTest {

--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml
@ -135,10 +135,21 @@
            <arg>--outputPath</arg><arg>${workingDir}/action_payload_by_type</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
        </spark>
-        <ok to="PromoteActionPayloadForDatasetTable"/>
+        <ok to="ForkPromote"/>
        <error to="Kill"/>
    </action>

+    <fork name="ForkPromote">
+        <path start="PromoteActionPayloadForDatasetTable"/>
+        <path start="PromoteActionPayloadForDatasourceTable"/>
+        <path start="PromoteActionPayloadForOrganizationTable"/>
+        <path start="PromoteActionPayloadForOtherResearchProductTable"/>
+        <path start="PromoteActionPayloadForProjectTable"/>
+        <path start="PromoteActionPayloadForPublicationTable"/>
+        <path start="PromoteActionPayloadForRelationTable"/>
+        <path start="PromoteActionPayloadForSoftwareTable"/>
+    </fork>
+
    <action name="PromoteActionPayloadForDatasetTable">
        <sub-workflow>
            <app-path>${wf:appPath()}/promote_action_payload_for_dataset_table</app-path>
@ -150,7 +161,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForDatasourceTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -165,7 +176,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForOrganizationTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -180,7 +191,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForOtherResearchProductTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -195,7 +206,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForProjectTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -210,7 +221,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForPublicationTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -225,7 +236,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForRelationTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -240,7 +251,7 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="PromoteActionPayloadForSoftwareTable"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

@ -255,9 +266,11 @@
                </property>
            </configuration>
        </sub-workflow>
-        <ok to="End"/>
+        <ok to="JoinPromote"/>
        <error to="Kill"/>
    </action>

+    <join name="JoinPromote" to="End"/>
+
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
@ -46,6 +46,9 @@ public class GetOpenCitationsRefs implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}", outputPath);

+		final String backupPath = parser.get("backupPath");
+		log.info("backupPath {}", backupPath);
+
 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);

@ -53,11 +56,11 @@ public class GetOpenCitationsRefs implements Serializable {

 		GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();

-		ocr.doExtract(inputPath, outputPath, fileSystem);
+		ocr.doExtract(inputPath, outputPath, backupPath, fileSystem);

 	}

-	private void doExtract(String inputPath, String outputPath, FileSystem fileSystem)
+	private void doExtract(String inputPath, String outputPath, String backupPath, FileSystem fileSystem)
 		throws IOException {

 		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
@ -89,6 +92,7 @@ public class GetOpenCitationsRefs implements Serializable {
 				}

 			}
+			fileSystem.rename(fileStatus.getPath(), new Path(backupPath));
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -49,9 +49,6 @@ public class ReadCOCI implements Serializable {
 		final String workingPath = parser.get("inputPath");
 		log.info("workingPath {}", workingPath);

-		final String backupPath = parser.get("backupPath");
-		log.info("backupPath {}", backupPath);
-
 		SparkConf sconf = new SparkConf();

 		Configuration conf = new Configuration();
@ -71,14 +68,12 @@ public class ReadCOCI implements Serializable {
 					workingPath,
 					fileSystem,
 					outputPath,
-					backupPath,
 					delimiter);
 			});
 	}

 	private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem,
 		String outputPath,
-		String backupPath,
 		String delimiter) throws IOException {
 		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
 			.listFiles(
@ -113,7 +108,7 @@ public class ReadCOCI implements Serializable {
 				.option("compression", "gzip")
 				.json(outputPath);

-			fileSystem.rename(fileStatus.getPath(), new Path(backupPath));
+			fileSystem.delete(fileStatus.getPath());
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java
@ -11,6 +11,7 @@ import java.util.stream.Collectors;

 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.BZip2Codec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
@ -20,7 +21,6 @@ import org.apache.spark.sql.*;
 import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.spark_project.jetty.util.StringUtil;

 import com.fasterxml.jackson.databind.ObjectMapper;

@ -297,7 +297,7 @@ public class ExtractPerson implements Serializable {
 	}

 	private static Relation getAffiliationRelation(Employment row) {
-		String source = PERSON_PREFIX + IdentifierFactory.md5(row.getOrcid());
+		String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(row.getOrcid());
 		String target = ROR_PREFIX
 			+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAffiliationId().getValue()));
 		List<KeyValue> properties = new ArrayList<>();
@ -317,13 +317,13 @@ public class ExtractPerson implements Serializable {
 						"0.91"),
 				null);

-		if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) {
+		if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank(row.getStartDate())) {
 			KeyValue kv = new KeyValue();
 			kv.setKey("startDate");
 			kv.setValue(row.getStartDate());
 			properties.add(kv);
 		}
-		if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) {
+		if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank(row.getEndDate())) {
 			KeyValue kv = new KeyValue();
 			kv.setKey("endDate");
 			kv.setValue(row.getEndDate());
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -35,6 +35,5 @@ crossrefInputPath=/data/bip-affiliations/crossref-data.json
 pubmedInputPath=/data/bip-affiliations/pubmed-data.json
 openapcInputPath=/data/bip-affiliations/openapc-data.json
 dataciteInputPath=/data/bip-affiliations/datacite-data.json
-webCrawlInputPath=/data/bip-affiliations/webCrawl/

 outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -21,10 +21,6 @@
            <name>webCrawlInputPath</name>
            <description>the path where to find the inferred affiliation relations from webCrawl</description>
        </property>
-        <property>
-            <name>publisherInputPath</name>
-            <description>the path where to find the inferred affiliation relations from publisher websites</description>
-        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -121,7 +117,6 @@
            <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
            <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
            <arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
-            <arg>--publisherInputPath</arg><arg>${publisherInputPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
@ -16,5 +16,11 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
+  },
+  {
+    "paramName": "bp",
+    "paramLongName": "backupPath",
+    "paramDescription": "the hdfs path to move the OC data after the extraction",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
@ -30,12 +30,6 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
-  },
-  {
-    "paramName": "bp",
-    "paramLongName": "backupPath",
-    "paramDescription": "the hdfs path to move the OC data after the extraction",
-    "paramRequired": true
  }
 ]

--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
@ -94,17 +94,7 @@
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
            <arg>--inputPath</arg><arg>${inputPath}/Original</arg>
            <arg>--outputPath</arg><arg>${inputPath}/Extracted</arg>
-        </java>
-        <ok to="read"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extract_correspondence">
-        <java>
-            <main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
-            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
-            <arg>--inputPath</arg><arg>${inputPath}/correspondence</arg>
-            <arg>--outputPath</arg><arg>${inputPath}/correspondence_extracted</arg>
+            <arg>--backupPath</arg><arg>${inputPath}/backup</arg>
        </java>
        <ok to="read"/>
        <error to="Kill"/>
@ -129,7 +119,6 @@
            </spark-opts>
            <arg>--inputPath</arg><arg>${inputPath}/Extracted</arg>
            <arg>--outputPath</arg><arg>${inputPath}/JSON</arg>
-            <arg>--backupPath</arg><arg>${inputPath}/backup</arg>
            <arg>--delimiter</arg><arg>${delimiter}</arg>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
        </spark>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
@ -16,11 +16,10 @@
    "paramLongName": "isSparkSessionManged",
    "paramDescription": "the hdfs name node",
    "paramRequired": false
-  },
-  {
+  },{
  "paramName": "nn",
  "paramLongName": "nameNode",
  "paramDescription": "the hdfs name node",
  "paramRequired": true
-  }
+}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
@ -24,7 +24,7 @@

    <decision name="resume_from">
        <switch>
-            <case to="reset_workingDir">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
+            <case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
            <default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
        </switch>
    </decision>
@ -33,14 +33,6 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="reset_workingDir">
-        <fs>
-            <delete path="${workingDir}"/>
-            <mkdir path="${workingDir}"/>
-        </fs>
-        <ok to="download"/>
-        <error to="Kill"/>
-    </action>
    <action name="download">
        <shell xmlns="uri:oozie:shell-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2odf.transformationRule.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2odf.transformationRule.xml
@ -1,51 +1,41 @@
 <RESOURCE_PROFILE>
 	<HEADER>
-        <RESOURCE_IDENTIFIER
-            value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
-        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
-        <RESOURCE_KIND value="TransformationRuleDSResources"/>
-        <RESOURCE_URI value=""/>
-        <DATE_OF_CREATION value="2024-03-05T11:23:00+00:00"/>
+		<RESOURCE_IDENTIFIER value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" />
+		<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
+		<RESOURCE_KIND value="TransformationRuleDSResources" />
+		<RESOURCE_URI value="" />
+		<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
 	</HEADER>
 	<BODY>
 		<CONFIGURATION>
-            <SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc"/>
-            <SINK_METADATA_FORMAT name="odf_hbase"/>
-            <IMPORTED/>
+			<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
+			<SINK_METADATA_FORMAT name="odf_hbase" />
+			<IMPORTED />
 			<SCRIPT>
 				<TITLE>xslt_base2odf_hadoop</TITLE>
 				<CODE>
-                    <xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
-                        xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
-                        xmlns:base_dc="http://oai.base-search.net/base_dc/"
-                        xmlns:datacite="http://datacite.org/schema/kernel-4"
-                        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
-                        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-                        xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-                        xmlns:vocabulary="http://eu/dnetlib/transform/clean"
-                        xmlns:oaf="http://namespace.openaire.eu/oaf"
-                        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
-                        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
-                        xmlns:xs="http://www.w3.org/2001/XMLSchema"
-                        xmlns:dc="http://purl.org/dc/elements/1.1/"
+					<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:base_dc="http://oai.base-search.net/base_dc/"
+						xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+						xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
+						xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
 						exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
-                        <xsl:param name="varOfficialName"/>
-                        <xsl:param name="varDataSourceId"/>
-                        <xsl:param name="varFP7" select="'corda_______::'"/>
-                        <xsl:param name="varH2020" select="'corda__h2020::'"/>
-                        <xsl:param name="repoCode"
-                            select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
-                        <xsl:param name="index" select="0"/>
-                        <xsl:param name="transDate" select="current-dateTime()"/>
+						<xsl:param name="varOfficialName" />
+						<xsl:param name="varDataSourceId" />
+						<xsl:param name="varFP7" select="'corda_______::'" />
+						<xsl:param name="varH2020" select="'corda__h2020::'" />
+						<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
+						<xsl:param name="index" select="0" />
+						<xsl:param name="transDate" select="current-dateTime()" />

 						<xsl:template name="terminate">
-                            <xsl:message terminate="yes"> record is not compliant, transformation is
-                                interrupted. </xsl:message>
+							<xsl:message terminate="yes">
+								record is not compliant, transformation is interrupted.
+							</xsl:message>
 						</xsl:template>

 						<xsl:template match="/">
 							<record>
-                                <xsl:apply-templates select="//*[local-name() = 'header']"/>
+								<xsl:apply-templates select="//*[local-name() = 'header']" />


 								<!-- NOT USED 
@ -66,7 +56,7 @@

 										<xsl:for-each select="//base_dc:doi">
 											<datacite:identifier identifierType="DOI">
-                                                <xsl:value-of select="."/>
+												<xsl:value-of select="." />
 											</datacite:identifier>
 										</xsl:for-each>										

@ -74,67 +64,55 @@
 											<xsl:for-each
 												select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
 												<datacite:identifier alternateIdentifierType="url">
-                                                  <xsl:value-of select="."/>
+													<xsl:value-of select="." />
 												</datacite:identifier>
 											</xsl:for-each>

-                                            <xsl:for-each
-                                                select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
-                                                <datacite:identifier
-                                                  alternateIdentifierType="handle">
-                                                  <xsl:value-of select="."/>
+											<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
+												<datacite:identifier alternateIdentifierType="handle">
+													<xsl:value-of select="." />
 												</datacite:identifier>
 											</xsl:for-each>

-                                            <xsl:for-each
-                                                select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
-                                                <datacite:identifier alternateIdentifierType="urn">
-                                                  <xsl:value-of select="."/>
+											<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
+												<datacite:identifier alternateIdentifierType='urn'>
+													<xsl:value-of select="." />
 												</datacite:identifier>
 											</xsl:for-each>

-                                            <datacite:identifier
-                                                alternateIdentifierType="oai-original">
-                                                <xsl:value-of select="//oai:header/oai:identifier"/>
+											<datacite:identifier alternateIdentifierType="oai-original">
+												<xsl:value-of
+													select="//oai:header/oai:identifier" />
 											</datacite:identifier>
 											
 										</datacite:alternateIdentifiers>

-                                        <datacite:relatedIdentifiers/>
+										<datacite:relatedIdentifiers />


 										<xsl:for-each select="//base_dc:typenorm">
-                                            <datacite:resourceType>
-                                                <xsl:value-of
-                                                  select="vocabulary:clean(., 'base:normalized_types')"
-                                                />
-                                            </datacite:resourceType>
+											<datacite:resourceType><xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" /></datacite:resourceType>
 										</xsl:for-each>

 										<datacite:titles>
 											<xsl:for-each select="//dc:title">
 												<datacite:title>
-                                                  <xsl:value-of select="normalize-space(.)"/>
+													<xsl:value-of select="normalize-space(.)" />
 												</datacite:title>
 											</xsl:for-each>
 										</datacite:titles>

 										<datacite:creators>
 											<xsl:for-each select="//dc:creator">
-                                                <xsl:variable name="author"
-                                                  select="normalize-space(.)"/>
+												<xsl:variable name="author" select="normalize-space(.)" />
 												<datacite:creator>
 													<datacite:creatorName>
-                                                  <xsl:value-of select="$author"/>
+														<xsl:value-of select="$author" />
 													</datacite:creatorName>
-                                                  <xsl:for-each
-                                                  select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
+													<xsl:for-each select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
 														<xsl:if test="contains(.,'https://orcid.org/')">
-                                                  <nameIdentifier schemeURI="https://orcid.org/"
-                                                  nameIdentifierScheme="ORCID">
-                                                  <xsl:value-of
-                                                  select="substring-after(., 'https://orcid.org/')"
-                                                  />
+															<nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">
+																<xsl:value-of select="substring-after(., 'https://orcid.org/')" />
 															</nameIdentifier>
 														</xsl:if>
 													</xsl:for-each>
@ -146,7 +124,7 @@
 											<xsl:for-each select="//dc:contributor">
 												<datacite:contributor>
 													<datacite:contributorName>
-                                                  <xsl:value-of select="normalize-space(.)"/>
+														<xsl:value-of select="normalize-space(.)" />
 													</datacite:contributorName>
 												</datacite:contributor>
 											</xsl:for-each>
@ -155,7 +133,7 @@
 										<datacite:descriptions>
 											<xsl:for-each select="//dc:description">
 												<datacite:description descriptionType="Abstract">
-                                                  <xsl:value-of select="normalize-space(.)"/>
+													<xsl:value-of select="normalize-space(.)" />
 												</datacite:description>
 											</xsl:for-each>
 										</datacite:descriptions>
@ -163,47 +141,43 @@
 										<datacite:subjects>
 											<xsl:for-each select="//dc:subject">
 												<datacite:subject>
-                                                  <xsl:value-of select="normalize-space(.)"/>
+													<xsl:value-of select="normalize-space(.)" />
 												</datacite:subject>
 											</xsl:for-each>
 											
-                                            <xsl:for-each
-                                                select="//base_dc:classcode|//base_dc:autoclasscode">
-                                                <datacite:subject subjectScheme="{@type}"
-                                                  classificationCode="{normalize-space(.)}">
+											<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
+												<datacite:subject subjectScheme="{@type}" classificationCode="{normalize-space(.)}">
 													<!-- TODO the value should be obtained by the Code -->
-                                                  <xsl:value-of select="normalize-space(.)"/>
+													<xsl:value-of select="normalize-space(.)" />
 												</datacite:subject>
 											</xsl:for-each>
 										</datacite:subjects>
 										
 										<xsl:for-each select="//dc:publisher">
 											<datacite:publisher>
-                                                <xsl:value-of select="normalize-space(.)"/>
+												<xsl:value-of select="normalize-space(.)" />
 											</datacite:publisher>
 										</xsl:for-each>
 										
 										<xsl:for-each select="//base_dc:year">
 											<datacite:publicationYear>
-                                                <xsl:value-of select="normalize-space(.)"/>
+												<xsl:value-of select="normalize-space(.)" />
 											</datacite:publicationYear>
 										</xsl:for-each>
 																				
 										<datacite:formats>
 											<xsl:for-each select="//dc:format">
 												<datacite:format>
-                                                  <xsl:value-of select="normalize-space(.)"/>
+													<xsl:value-of select="normalize-space(.)" />
 												</datacite:format>
 											</xsl:for-each>
 										</datacite:formats>
 										
 										<datacite:language>
-                                            <xsl:value-of
-                                                select="vocabulary:clean( //base_dc:lang, 'dnet:languages')"
-                                            />
+											<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
 										</datacite:language>

-                                    	<!--<datacite:rightsList>
+										<oaf:accessrights>
 											<xsl:if test="//base_dc:oa[.='0']">
 												<datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
 											</xsl:if>
@ -211,29 +185,21 @@
 												<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
 											</xsl:if>
 											<xsl:for-each select="//dc:rights|//base_dc:rightsnorm">
-                                                <datacite:rights>
-                                                    <xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')"/>
-                                                </datacite:rights>
+												<datacite:rights><xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')" /></datacite:rights>	
 											</xsl:for-each>
-                                        </datacite:rightsList>-->
+										</oaf:accessrights>

 									</datacite:resource>

 									<xsl:for-each select="//dc:relation">
-                                        <xsl:if
-                                            test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
 											<oaf:projectid>
-                                                <xsl:value-of
-                                                  select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"
-                                                />
+												<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
 											</oaf:projectid>
 										</xsl:if>
-                                        <xsl:if
-                                            test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
 											<oaf:projectid>
-                                                <xsl:value-of
-                                                  select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"
-                                                />
+												<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
 											</oaf:projectid>
 										</xsl:if>
 									</xsl:for-each>
@ -243,81 +209,68 @@
 										
 										<!-- Book part -->
 										<xsl:when test="//base_dc:typenorm = '111'">
-                                            <dr:CobjCategory type="publication"
-                                                >0013</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0013</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Book -->
 										<xsl:when test="//base_dc:typenorm = '11'">
-                                            <dr:CobjCategory type="publication"
-                                                >0002</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0002</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Article contribution -->
 										<xsl:when test="//base_dc:typenorm = '121'">
-                                            <dr:CobjCategory type="publication"
-                                                >0001</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
 										</xsl:when>
 										
 																				
 										<!-- Journal/Newspaper -->
 										<xsl:when test="//base_dc:typenorm = '12'">
-                                            <dr:CobjCategory type="publication"
-                                                >0043</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0043</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Report -->
 										<xsl:when test="//base_dc:typenorm = '14'">
-                                            <dr:CobjCategory type="publication"
-                                                >0017</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0017</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Review -->
 										<xsl:when test="//base_dc:typenorm = '15'">
-                                            <dr:CobjCategory type="publication"
-                                                >0015</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0015</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Lecture -->
 										<xsl:when test="//base_dc:typenorm = '17'">
-                                            <dr:CobjCategory type="publication"
-                                                >0010</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0010</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Bachelor's thesis -->
 										<xsl:when test="//base_dc:typenorm = '181'">
-                                            <dr:CobjCategory type="publication"
-                                                >0008</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0008</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Master's thesis -->
 										<xsl:when test="//base_dc:typenorm = '182'">
-                                            <dr:CobjCategory type="publication"
-                                                >0007</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0007</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Doctoral and postdoctoral thesis -->
 										<xsl:when test="//base_dc:typenorm = '183'">
-                                            <dr:CobjCategory type="publication"
-                                                >0006</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0006</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Thesis -->
 										<xsl:when test="//base_dc:typenorm = '18'">
-                                            <dr:CobjCategory type="publication"
-                                                >0044</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0044</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Patent -->
 										<xsl:when test="//base_dc:typenorm = '1A'">
-                                            <dr:CobjCategory type="publication"
-                                                >0019</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0019</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Text -->
 										<xsl:when test="//base_dc:typenorm = '1'">
-                                            <dr:CobjCategory type="publication"
-                                                >0001</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Software -->
@ -362,26 +315,22 @@
 										
 										<!-- Other non-article -->
 										<xsl:when test="//base_dc:typenorm = '122'">
-                                            <dr:CobjCategory type="publication"
-                                                >0038</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Course material -->
 										<xsl:when test="//base_dc:typenorm = '16'">
-                                            <dr:CobjCategory type="publication"
-                                                >0038</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Manuscript -->
 										<xsl:when test="//base_dc:typenorm = '19'">
-                                            <dr:CobjCategory type="publication"
-                                                >0038</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
 										</xsl:when>
 										
 										<!-- Conference object -->
 										<xsl:when test="//base_dc:typenorm = '13'">
-                                            <dr:CobjCategory type="publication"
-                                                >0004</dr:CobjCategory>
+											<dr:CobjCategory type="publication">0004</dr:CobjCategory>
 										</xsl:when>

 										<!-- Unknown -->
@ -399,100 +348,83 @@
 											<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
 											<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
 											<xsl:when test="//base_dc:rightsnorm">
-                                                <xsl:value-of
-                                                  select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')"
-                                                />
+												<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
 											</xsl:when>
 											<xsl:when test="//dc:rights">
-                                                <xsl:value-of
-                                                  select="vocabulary:clean( //dc:rights, 'dnet:access_modes')"
-                                                />
+												<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
 											</xsl:when>
 											<xsl:otherwise>UNKNOWN</xsl:otherwise>
 										</xsl:choose>
 									</oaf:accessrights>

-                                	<xsl:if test="//base_dc:rightsnorm and not(contains(//base_dc:rightsnorm, ';'))">
-                                		<oaf:license><xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:licenses')" /></oaf:license>
-                                	</xsl:if>
-                                	
 									<xsl:for-each select="//base_dc:doi">
 										<oaf:identifier identifierType="doi">
-                                            <xsl:value-of select="."/>
+											<xsl:value-of select="." />
 										</oaf:identifier>
 									</xsl:for-each>

 									<xsl:for-each
 										select="distinct-values(//dc:identifier[starts-with(., 'http') and ( not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
 										<oaf:identifier identifierType="url">
-                                            <xsl:value-of select="."/>
+											<xsl:value-of select="." />
 										</oaf:identifier>
 									</xsl:for-each>

-                                    <xsl:for-each
-                                        select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
 										<oaf:identifier identifierType="handle">
-                                            <xsl:value-of select="."/>
+											<xsl:value-of select="." />
 										</oaf:identifier>
 									</xsl:for-each>

-                                    <xsl:for-each
-                                        select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
-                                        <oaf:identifier identifierType="urn">
-                                            <xsl:value-of select="."/>
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
+										<oaf:identifier identifierType='urn'>
+											<xsl:value-of select="." />
 										</oaf:identifier>
 									</xsl:for-each>

 									<oaf:identifier identifierType="oai-original">
-                                        <xsl:value-of select="//oai:header/oai:identifier"/>
+										<xsl:value-of
+											select="//oai:header/oai:identifier" />
 									</oaf:identifier>

 									<oaf:hostedBy>
 										<xsl:attribute name="name">
-                                            <xsl:value-of select="//base_dc:collname"/>
+											<xsl:value-of select="//base_dc:collname" />
 										</xsl:attribute>
 										<xsl:attribute name="id">
-                                            <xsl:value-of
-                                                select="concat('opendoar____::', //base_dc:collection/@opendoar_id)"
-                                            />
+											<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
 										</xsl:attribute>
 									</oaf:hostedBy>

 									<oaf:collectedFrom>
 										<xsl:attribute name="name">
-                                            <xsl:value-of select="$varOfficialName"/>
+											<xsl:value-of select="$varOfficialName" />
 										</xsl:attribute>
 										<xsl:attribute name="id">
-                                            <xsl:value-of select="$varDataSourceId"/>
+											<xsl:value-of select="$varDataSourceId" />
 										</xsl:attribute>
 									</oaf:collectedFrom>

 									<oaf:dateAccepted>
-                                        <xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )"/>
+										<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
 									</oaf:dateAccepted>

 									<xsl:if test="//base_dc:oa[.='1']">
 										<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
 											<oaf:fulltext>
-                                                <xsl:value-of select="normalize-space(.)"/>
+												<xsl:value-of select="normalize-space(.)" />
 											</oaf:fulltext>
 										</xsl:for-each>
 									</xsl:if>

 									<xsl:for-each select="//base_dc:collection/@ror_id">
-                                        <oaf:relation relType="resultOrganization"
-                                            subRelType="affiliation" relClass="hasAuthorInstitution"
-                                            targetType="organization">
+										<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution" targetType="organization">
 											<xsl:choose>
 												<xsl:when test="contains(.,'https://ror.org/')">
-                                                  <xsl:value-of
-                                                  select="concat('ror_________::', normalize-space(.))"
-                                                  />
+													<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
 												</xsl:when>
 												<xsl:otherwise>
-                                                  <xsl:value-of
-                                                  select="concat('ror_________::https://ror.org/', normalize-space(.))"
-                                                  />
+													<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
 												</xsl:otherwise>
 											</xsl:choose>
 										</oaf:relation>
@ -503,39 +435,38 @@
 										<oaf:deletedbyinference>false</oaf:deletedbyinference>
 										<oaf:trust>0.89</oaf:trust>
 										<oaf:inferenceprovenance/>
-                                        <oaf:provenanceaction
-                                            classid="sysimport:crosswalk:aggregator"
+										<oaf:provenanceaction classid="sysimport:crosswalk:aggregator"
 											classname="sysimport:crosswalk:aggregator"
 											schemeid="dnet:provenanceActions"
 											schemename="dnet:provenanceActions"/>
 									</oaf:datainfo>
 								</metadata>
-                                <xsl:copy-of select="//*[local-name() = 'about']"/>
+								<xsl:copy-of select="//*[local-name() = 'about']" />
 							</record>
 						</xsl:template>

 						<xsl:template match="//*[local-name() = 'header']">
 							<xsl:if test="//oai:header/@status='deleted'">
-                                <xsl:call-template name="terminate"/>
+								<xsl:call-template name="terminate" />
 							</xsl:if>
 							<xsl:copy>
-                                <xsl:apply-templates select="node()|@*"/>
+								<xsl:apply-templates select="node()|@*" />
 								<xsl:element name="dr:dateOfTransformation">
-                                    <xsl:value-of select="$transDate"/>
+									<xsl:value-of select="$transDate" />
 								</xsl:element>
 							</xsl:copy>
 						</xsl:template>

 						<xsl:template match="node()|@*">
 							<xsl:copy>
-                                <xsl:apply-templates select="node()|@*"/>
+								<xsl:apply-templates select="node()|@*" />
 							</xsl:copy>
 						</xsl:template>
 					</xsl:stylesheet>
 				</CODE>
 			</SCRIPT>
 		</CONFIGURATION>
-        <STATUS/>
-        <SECURITY_PARAMETERS/>
+		<STATUS />
+		<SECURITY_PARAMETERS />
 	</BODY>
 </RESOURCE_PROFILE>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
@ -1,4 +1,4 @@
-<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
+    <workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
@ -8,40 +8,19 @@
            <name>database</name>
            <description>the PDB Database Working Path</description>
        </property>
+
        <property>
-            <name>mdStoreOutputId</name>
-            <description>the identifier of the cleaned MDStore</description>
-        </property>
-        <property>
-            <name>mdStoreManagerURI</name>
-            <description>the path of the cleaned mdstore</description>
+            <name>targetPath</name>
+            <description>the Target Working dir path</description>
        </property>
    </parameters>

-    <start to="StartTransaction"/>
-
+    <start to="ConvertDB"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="StartTransaction">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>NEW_VERSION</arg>
-            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            <capture-output/>
-        </java>
-        <ok to="ConvertDB"/>
-        <error to="RollBack"/>
-    </action>
    <action name="ConvertDB">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -62,48 +41,11 @@
            <arg>--master</arg><arg>yarn</arg>
            <arg>--dbPath</arg><arg>${sourcePath}</arg>
            <arg>--database</arg><arg>${database}</arg>
-            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
        </spark>
-        <ok to="CommitVersion"/>
-        <error to="RollBack"/>
-
-    </action>
-        <action name="CommitVersion">
-            <java>
-                <configuration>
-                    <property>
-                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                        <value>true</value>
-                    </property>
-                </configuration>
-                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-                <arg>--action</arg><arg>COMMIT</arg>
-                <arg>--namenode</arg><arg>${nameNode}</arg>
-                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            </java>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
-
-        <action name="RollBack">
-            <java>
-                <configuration>
-                    <property>
-                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                        <value>true</value>
-                    </property>
-                </configuration>
-                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-                <arg>--action</arg><arg>ROLLBACK</arg>
-                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            </java>
-            <ok to="Kill"/>
-            <error to="Kill"/>
-        </action>
-
-
    <end name="End"/>

 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
@ -2,5 +2,5 @@
  {"paramName":"mt",  "paramLongName":"master",       "paramDescription": "should be local or yarn",                  "paramRequired": true},
  {"paramName":"db",  "paramLongName":"database",     "paramDescription": "should be PDB or UNIPROT",                 "paramRequired": true},
  {"paramName":"p",   "paramLongName":"dbPath",       "paramDescription": "the path of the database to transform",    "paramRequired": true},
-  {"paramName":"mo",   "paramLongName":"mdstoreOutputVersion",     "paramDescription": "the oaf path ",                "paramRequired": true}
+  {"paramName":"t",   "paramLongName":"targetPath",   "paramDescription": "the OAF target path ",                     "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
@ -1,20 +1,5 @@
 [
-  {
-    "paramName": "mt",
-    "paramLongName": "master",
-    "paramDescription": "should be local or yarn",
-    "paramRequired": true
-  },
-  {
-    "paramName": "s",
-    "paramLongName": "sourcePath",
-    "paramDescription": "the source Path",
-    "paramRequired": true
-  },
-  {
-    "paramName": "mo",
-    "paramLongName": "mdstoreOutputVersion",
-    "paramDescription": "the oaf path ",
-    "paramRequired": true
-  }
+  {"paramName":"mt",  "paramLongName":"master",     "paramDescription": "should be local or yarn",                  "paramRequired": true},
+  {"paramName":"s",   "paramLongName":"sourcePath","paramDescription": "the source Path",                              "paramRequired": true},
+  {"paramName":"t",   "paramLongName":"targetPath","paramDescription": "the  oaf path ",  "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
@ -9,26 +9,34 @@
            <description>the Working Path</description>
        </property>
        <property>
-            <name>mdStoreOutputId</name>
-            <description>the identifier of the cleaned MDStore</description>
+            <name>targetPath</name>
+            <description>the OAF MDStore Path</description>
        </property>
        <property>
-            <name>mdStoreManagerURI</name>
-            <description>the path of the cleaned mdstore</description>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>resumeFrom</name>
-            <value>CreateEBIDataSet</value>
+            <value>DownloadEBILinks</value>
            <description>node to start</description>
        </property>
    </parameters>

-    <start to="StartTransaction"/>
+    <start to="resume_from"/>

    <decision name="resume_from">
        <switch>
            <case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
-            <case to="StartTransaction">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
+            <case to="CreateEBIDataSet">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
            <default to="DownloadEBILinks"/>
        </switch>
    </decision>
@ -69,29 +77,9 @@
            <move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
            <move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
        </fs>
-        <ok to="StartTransaction"/>
+        <ok to="CreateEBIDataSet"/>
        <error to="Kill"/>
    </action>
-
-    <action name="StartTransaction">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>NEW_VERSION</arg>
-            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            <capture-output/>
-        </java>
-        <ok to="CreateEBIDataSet"/>
-        <error to="RollBack"/>
-    </action>
-
-
    <action name="CreateEBIDataSet">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
@ -107,49 +95,11 @@
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
-            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
            <arg>--master</arg><arg>yarn</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
-
-
-    <action name="CommitVersion">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>COMMIT</arg>
-            <arg>--namenode</arg><arg>${nameNode}</arg>
-            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-        </java>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="RollBack">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>ROLLBACK</arg>
-            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-        </java>
-        <ok to="Kill"/>
-        <error to="Kill"/>
-    </action>
-
    <end name="End"/>
-
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{
  PidType
 }
 import eu.dnetlib.dhp.utils.DHPUtils
-import org.apache.commons.lang.StringUtils
+import org.apache.commons.lang3.StringUtils
 import org.apache.spark.sql.Row
 import org.json4s
 import org.json4s.DefaultFormats
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -407,10 +407,9 @@ object DataciteToOAFTransformation {
        )
      }
      if (c.affiliation.isDefined)
-        a.setAffiliation(
+        a.setRawAffiliationString(
          c.affiliation.get
            .filter(af => af.nonEmpty)
-            .map(af => OafMapperUtils.field(af, dataInfo))
            .asJava
        )
      a.setRank(idx + 1)
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -231,7 +231,7 @@ object BioDBToOAF {
  def uniprotToOAF(input: String): List[Oaf] = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)
-    val pid = (json \ "pid").extract[String].trim()
+    val pid = (json \ "pid").extract[String]

    val d = new Dataset

--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
@ -2,15 +2,12 @@ package eu.dnetlib.dhp.sx.bio

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.collection.CollectionUtils
-import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
-import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
 import eu.dnetlib.dhp.schema.oaf.Oaf
 import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
-import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkTransformBioDatabaseToOAF {

@ -28,13 +25,8 @@ object SparkTransformBioDatabaseToOAF {

    val dbPath: String = parser.get("dbPath")
    log.info("dbPath: {}", database)
-
-    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
-    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
-
-    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
-    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
-    log.info("outputBasePath: {}", outputBasePath)
+    val targetPath: String = parser.get("targetPath")
+    log.info("targetPath: {}", database)

    val spark: SparkSession =
      SparkSession
@ -51,28 +43,24 @@ object SparkTransformBioDatabaseToOAF {
      case "UNIPROT" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "PDB" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "SCHOLIX" =>
        CollectionUtils.saveDataset(
          spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "CROSSREF_LINKS" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
    }
-
-    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
-    val mdStoreSize = df.count
-    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }

 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
@ -9,9 +9,6 @@ import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql._
 import org.slf4j.{Logger, LoggerFactory}
-import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
-import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
-import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkEBILinksToOaf {

@ -35,13 +32,8 @@ object SparkEBILinksToOaf {
    import spark.implicits._
    val sourcePath = parser.get("sourcePath")
    log.info(s"sourcePath  -> $sourcePath")
-    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
-    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
-
-    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
-    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
-    log.info("outputBasePath: {}", outputBasePath)
-
+    val targetPath = parser.get("targetPath")
+    log.info(s"targetPath  -> $targetPath")
    implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])

    val ebLinks: Dataset[EBILinkItem] = spark.read
@ -54,10 +46,7 @@ object SparkEBILinksToOaf {
        .flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
        .filter(p => BioDBToOAF.EBITargetLinksFilter(p))
        .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
-      s"$outputBasePath/$MDSTORE_DATA_PATH"
+      targetPath
    )
-    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
-    val mdStoreSize = df.count
-    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -28,7 +28,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
 import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
 import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;

@ -40,8 +39,7 @@ public class PrepareAffiliationRelationsTest {

 	private static Path workingDir;
 	private static final String ID_PREFIX = "50|doi_________::";
-	private static final Logger log = LoggerFactory
-		.getLogger(PrepareAffiliationRelationsTest.class);
+	private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelationsTest.class);

 	@BeforeAll
 	public static void beforeAll() throws IOException {
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
@ -77,13 +77,13 @@ public class RemapTest {
 		MapOCIdsInPids
 			.main(
 				new String[] {
-					"--isSparkSessionManged",
+					"-isSparkSessionManged",
 					Boolean.FALSE.toString(),
-					"--inputPath",
+					"-inputPath",
 					inputPath,
-					"--outputPath",
+					"-outputPath",
 					workingDir.toString() + "/out/",
-					"--nameNode", "hdfs://localhost"
+					"-nameNode", "input1;input2;input3;input4;input5"
 				});

 	}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
@ -1,44 +1,15 @@
-{"classification": "Signaling protein", "pdb": "5NM4", "deposition_date": "2017-04-05", "title": "A2a adenosine receptor room-temperature structure determined by serial Femtosecond crystallography", "Keywords": ["Oom-temperature", " serial crystallography", " signaling protein"], "authors": ["T.weinert", "R.cheng", "D.james", "D.gashi", "P.nogly", "K.jaeger", "M.hennig", "", "J.standfuss"], "pmid": "28912485", "doi": "10.1038/S41467-017-00630-4"}
-{"classification": "Oxidoreductase/oxidoreductase inhibitor", "pdb": "4KN3", "deposition_date": "2013-05-08", "title": "Structure of the y34ns91g double mutant of dehaloperoxidase from Amphitrite ornata with 2,4,6-trichlorophenol", "Keywords": ["Lobin", " oxygen storage", " peroxidase", " oxidoreductase", " oxidoreductase-", "Oxidoreductase inhibitor complex"], "authors": ["C.wang", "L.lovelace", "L.lebioda"], "pmid": "23952341", "doi": "10.1021/BI400627W"}
-{"classification": "Transport protein", "pdb": "8HKM", "deposition_date": "2022-11-27", "title": "Ion channel", "Keywords": ["On channel", " transport protein"], "authors": ["D.h.jiang", "J.t.zhang"], "pmid": "37494189", "doi": "10.1016/J.CELREP.2023.112858"}
-{"classification": "Signaling protein", "pdb": "6JT1", "deposition_date": "2019-04-08", "title": "Structure of human soluble guanylate cyclase in the heme oxidised State", "Keywords": ["Oluble guanylate cyclase", " signaling protein"], "authors": ["L.chen", "Y.kang", "R.liu", "J.-x.wu"], "pmid": "31514202", "doi": "10.1038/S41586-019-1584-6"}
-{"classification": "Immune system", "pdb": "7OW6", "deposition_date": "2021-06-16", "title": "Crystal structure of a tcr in complex with hla-a*11:01 bound to kras G12d peptide (vvvgadgvgk)", "Keywords": ["La", " kras", " tcr", " immune system"], "authors": ["V.karuppiah", "R.a.robinson"], "doi": "10.1038/S41467-022-32811-1"}
-{"classification": "Biosynthetic protein", "pdb": "5EQ8", "deposition_date": "2015-11-12", "title": "Crystal structure of medicago truncatula histidinol-phosphate Phosphatase (mthpp) in complex with l-histidinol", "Keywords": ["Istidine biosynthesis", " metabolic pathways", " dimer", " plant", "", "Biosynthetic protein"], "authors": ["M.ruszkowski", "Z.dauter"], "pmid": "26994138", "doi": "10.1074/JBC.M115.708727"}
-{"classification": "De novo protein", "pdb": "8CWA", "deposition_date": "2022-05-18", "title": "Solution nmr structure of 8-residue rosetta-designed cyclic peptide D8.21 in cdcl3 with cis/trans switching (tc conformation, 53%)", "Keywords": ["Yclic peptide", " non natural amino acids", " cis/trans", " switch peptides", "", "De novo design", "Membrane permeability", "De novo protein"], "authors": ["T.a.ramelot", "R.tejero", "G.t.montelione"], "pmid": "36041435", "doi": "10.1016/J.CELL.2022.07.019"}
-{"classification": "Hydrolase", "pdb": "3R6M", "deposition_date": "2011-03-21", "title": "Crystal structure of vibrio parahaemolyticus yeaz", "Keywords": ["Ctin/hsp70 nucleotide-binding fold", " bacterial resuscitation", " viable", "But non-culturable state", "Resuscitation promoting factor", "Ygjd", "", "Yjee", "Vibrio parahaemolyticus", "Hydrolase"], "authors": ["A.roujeinikova", "I.aydin"], "pmid": "21858042", "doi": "10.1371/JOURNAL.PONE.0023245"}
-{"classification": "Hydrolase", "pdb": "2W5J", "deposition_date": "2008-12-10", "title": "Structure of the c14-rotor ring of the proton translocating Chloroplast atp synthase", "Keywords": ["Ydrolase", " chloroplast", " atp synthase", " lipid-binding", " cf(0)", " membrane", "", "Transport", "Formylation", "Energy transduction", "Hydrogen ion transport", "", "Ion transport", "Transmembrane", "Membrane protein"], "authors": ["M.vollmar", "D.schlieper", "M.winn", "C.buechner", "G.groth"], "pmid": "19423706", "doi": "10.1074/JBC.M109.006916"}
-{"classification": "De novo protein", "pdb": "4GLU", "deposition_date": "2012-08-14", "title": "Crystal structure of the mirror image form of vegf-a", "Keywords": ["-protein", " covalent dimer", " cysteine knot protein", " growth factor", " de", "Novo protein"], "authors": ["K.mandal", "M.uppalapati", "D.ault-riche", "J.kenney", "J.lowitz", "S.sidhu", "", "S.b.h.kent"], "pmid": "22927390", "doi": "10.1073/PNAS.1210483109"}
-{"classification": "Hydrolase/hydrolase inhibitor", "pdb": "3WYL", "deposition_date": "2014-09-01", "title": "Crystal structure of the catalytic domain of pde10a complexed with 5- Methoxy-3-(1-phenyl-1h-pyrazol-5-yl)-1-(3-(trifluoromethyl)phenyl) Pyridazin-4(1h)-one", "Keywords": ["Ydrolase-hydrolase inhibitor complex"], "authors": ["H.oki", "Y.hayano"], "pmid": "25384088", "doi": "10.1021/JM5013648"}
-{"classification": "Isomerase", "pdb": "5BOR", "deposition_date": "2015-05-27", "title": "Structure of acetobacter aceti pure-s57c, sulfonate form", "Keywords": ["Cidophile", " pure", " purine biosynthesis", " isomerase"], "authors": ["K.l.sullivan", "T.j.kappock"]}
-{"classification": "Hydrolase", "pdb": "1X0C", "deposition_date": "2005-03-17", "title": "Improved crystal structure of isopullulanase from aspergillus niger Atcc 9642", "Keywords": ["Ullulan", " glycoside hydrolase family 49", " glycoprotein", " hydrolase"], "authors": ["M.mizuno", "T.tonozuka", "A.yamamura", "Y.miyasaka", "H.akeboshi", "S.kamitori", "", "A.nishikawa", "Y.sakano"], "pmid": "18155243", "doi": "10.1016/J.JMB.2007.11.098"}
-{"classification": "Oxidoreductase", "pdb": "7CUP", "deposition_date": "2020-08-23", "title": "Structure of 2,5-dihydroxypridine dioxygenase from pseudomonas putida Kt2440", "Keywords": ["On-heme dioxygenase", " oxidoreductase"], "authors": ["G.q.liu", "H.z.tang"]}
-{"classification": "Ligase", "pdb": "1VCN", "deposition_date": "2004-03-10", "title": "Crystal structure of t.th. hb8 ctp synthetase complex with sulfate Anion", "Keywords": ["Etramer", " riken structural genomics/proteomics initiative", " rsgi", "", "Structural genomics", "Ligase"], "authors": ["M.goto", "Riken structural genomics/proteomics initiative (rsgi)"], "pmid": "15296735", "doi": "10.1016/J.STR.2004.05.013"}
-{"classification": "Transferase/transferase inhibitor", "pdb": "6C9V", "deposition_date": "2018-01-28", "title": "Mycobacterium tuberculosis adenosine kinase bound to (2r,3s,4r,5r)-2- (hydroxymethyl)-5-(6-(4-phenylpiperazin-1-yl)-9h-purin-9-yl) Tetrahydrofuran-3,4-diol", "Keywords": ["Ucleoside analog", " complex", " inhibitor", " structural genomics", " psi-2", "", "Protein structure initiative", "Tb structural genomics consortium", "", "Tbsgc", "Transferase-transferase inhibitor complex"], "authors": ["R.a.crespo", "Tb structural genomics consortium (tbsgc)"], "pmid": "31002508", "doi": "10.1021/ACS.JMEDCHEM.9B00020"}
-{"classification": "De novo protein", "pdb": "4LPY", "deposition_date": "2013-07-16", "title": "Crystal structure of tencon variant g10", "Keywords": ["Ibronectin type iii fold", " alternate scaffold", " de novo protein"], "authors": ["A.teplyakov", "G.obmolova", "G.l.gilliland"], "pmid": "24375666", "doi": "10.1002/PROT.24502"}
-{"classification": "Isomerase", "pdb": "2Y88", "deposition_date": "2011-02-03", "title": "Crystal structure of mycobacterium tuberculosis phosphoribosyl Isomerase (variant d11n) with bound prfar", "Keywords": ["Romatic amino acid biosynthesis", " isomerase", " tim-barrel", " histidine", "Biosynthesis", "Tryptophan biosynthesis"], "authors": ["J.kuper", "A.v.due", "A.geerlof", "M.wilmanns"], "pmid": "21321225", "doi": "10.1073/PNAS.1015996108"}
-{"classification": "Unknown function", "pdb": "1SR0", "deposition_date": "2004-03-22", "title": "Crystal structure of signalling protein from sheep(sps-40) at 3.0a Resolution using crystal grown in the presence of polysaccharides", "Keywords": ["Ignalling protein", " involution", " unknown function"], "authors": ["D.b.srivastava", "A.s.ethayathulla", "N.singh", "J.kumar", "S.sharma", "T.p.singh"]}
-{"classification": "Dna binding protein", "pdb": "3RH2", "deposition_date": "2011-04-11", "title": "Crystal structure of a tetr-like transcriptional regulator (sama_0099) From shewanella amazonensis sb2b at 2.42 a resolution", "Keywords": ["Na/rna-binding 3-helical bundle", " structural genomics", " joint center", "For structural genomics", "Jcsg", "Protein structure initiative", "Psi-", "Biology", "Dna binding protein"], "authors": ["Joint center for structural genomics (jcsg)"]}
-{"classification": "Transferase", "pdb": "2WK5", "deposition_date": "2009-06-05", "title": "Structural features of native human thymidine phosphorylase And in complex with 5-iodouracil", "Keywords": ["Lycosyltransferase", " developmental protein", " angiogenesis", "", "5-iodouracil", "Growth factor", "Enzyme kinetics", "", "Differentiation", "Disease mutation", "Thymidine", "Phosphorylase", "Chemotaxis", "Transferase", "Mutagenesis", "", "Polymorphism"], "authors": ["E.mitsiki", "A.c.papageorgiou", "S.iyer", "N.thiyagarajan", "S.h.prior", "", "D.sleep", "C.finnis", "K.r.acharya"], "pmid": "19555658", "doi": "10.1016/J.BBRC.2009.06.104"}
-{"classification": "Hydrolase", "pdb": "3P9Y", "deposition_date": "2010-10-18", "title": "Crystal structure of the drosophila melanogaster ssu72-pctd complex", "Keywords": ["Hosphatase", " cis proline", " lmw ptp-like fold", " rna polymerase ii ctd", "", "Hydrolase"], "authors": ["J.w.werner-allen", "P.zhou"], "pmid": "21159777", "doi": "10.1074/JBC.M110.197129"}
-{"classification": "Recombination/dna", "pdb": "6OEO", "deposition_date": "2019-03-27", "title": "Cryo-em structure of mouse rag1/2 nfc complex (dna1)", "Keywords": ["(d)j recombination", " dna transposition", " rag", " scid", " recombination", "", "Recombination-dna complex"], "authors": ["X.chen", "Y.cui", "Z.h.zhou", "W.yang", "M.gellert"], "pmid": "32015552", "doi": "10.1038/S41594-019-0363-2"}
-{"classification": "Hydrolase", "pdb": "4ECA", "deposition_date": "1997-02-21", "title": "Asparaginase from e. coli, mutant t89v with covalently bound aspartate", "Keywords": ["Ydrolase", " acyl-enzyme intermediate", " threonine amidohydrolase"], "authors": ["G.j.palm", "J.lubkowski", "A.wlodawer"], "pmid": "8706862", "doi": "10.1016/0014-5793(96)00660-6"}
-{"classification": "Transcription/protein binding", "pdb": "3UVX", "deposition_date": "2011-11-30", "title": "Crystal structure of the first bromodomain of human brd4 in complex With a diacetylated histone 4 peptide (h4k12ack16ac)", "Keywords": ["Romodomain", " bromodomain containing protein 4", " cap", " hunk1", " mcap", "", "Mitotic chromosome associated protein", "Peptide complex", "Structural", "Genomics consortium", "Sgc", "Transcription-protein binding complex"], "authors": ["P.filippakopoulos", "S.picaud", "T.keates", "E.ugochukwu", "F.von delft", "", "C.h.arrowsmith", "A.m.edwards", "J.weigelt", "C.bountra", "S.knapp", "Structural", "Genomics consortium (sgc)"], "pmid": "22464331", "doi": "10.1016/J.CELL.2012.02.013"}
-{"classification": "Membrane protein", "pdb": "1TLZ", "deposition_date": "2004-06-10", "title": "Tsx structure complexed with uridine", "Keywords": ["Ucleoside transporter", " beta barrel", " uridine", " membrane", "Protein"], "authors": ["J.ye", "B.van den berg"], "pmid": "15272310", "doi": "10.1038/SJ.EMBOJ.7600330"}
-{"classification": "Dna binding protein", "pdb": "7AZD", "deposition_date": "2020-11-16", "title": "Dna polymerase sliding clamp from escherichia coli with peptide 20 Bound", "Keywords": ["Ntibacterial drug", " dna binding protein"], "authors": ["C.monsarrat", "G.compain", "C.andre", "I.martiel", "S.engilberge", "V.olieric", "", "P.wolff", "K.brillet", "M.landolfo", "C.silva da veiga", "J.wagner", "G.guichard", "", "D.y.burnouf"], "pmid": "34806883", "doi": "10.1021/ACS.JMEDCHEM.1C00918"}
-{"classification": "Transferase", "pdb": "5N3K", "deposition_date": "2017-02-08", "title": "Camp-dependent protein kinase a from cricetulus griseus in complex With fragment like molecule o-guanidino-l-homoserine", "Keywords": ["Ragment", " complex", " transferase", " serine threonine kinase", " camp", "", "Kinase", "Pka"], "authors": ["C.siefker", "A.heine", "G.klebe"]}
-{"classification": "Biosynthetic protein", "pdb": "8H52", "deposition_date": "2022-10-11", "title": "Crystal structure of helicobacter pylori carboxyspermidine Dehydrogenase in complex with nadp", "Keywords": ["Arboxyspermidine dehydrogenase", " biosynthetic protein"], "authors": ["K.y.ko", "S.c.park", "S.y.cho", "S.i.yoon"], "pmid": "36283333", "doi": "10.1016/J.BBRC.2022.10.049"}
-{"classification": "Metal binding protein", "pdb": "6DYC", "deposition_date": "2018-07-01", "title": "Co(ii)-bound structure of the engineered cyt cb562 variant, ch3", "Keywords": ["Esigned protein", " 4-helix bundle", " electron transport", " metal binding", "Protein"], "authors": ["F.a.tezcan", "J.rittle"], "pmid": "30778140", "doi": "10.1038/S41557-019-0218-9"}
-{"classification": "Protein fibril", "pdb": "6A6B", "deposition_date": "2018-06-27", "title": "Cryo-em structure of alpha-synuclein fiber", "Keywords": ["Lpha-syn fiber", " parkinson disease", " protein fibril"], "authors": ["Y.w.li", "C.y.zhao", "F.luo", "Z.liu", "X.gui", "Z.luo", "X.zhang", "D.li", "C.liu", "X.li"], "pmid": "30065316", "doi": "10.1038/S41422-018-0075-X"}
-{"classification": "Dna", "pdb": "7D5E", "deposition_date": "2020-09-25", "title": "Left-handed g-quadruplex containing two bulges", "Keywords": ["-quadruplex", " bulge", " dna", " left-handed"], "authors": ["P.das", "A.maity", "K.h.ngo", "F.r.winnerdy", "B.bakalar", "Y.mechulam", "E.schmitt", "", "A.t.phan"], "pmid": "33503265", "doi": "10.1093/NAR/GKAA1259"}
-{"classification": "Transferase", "pdb": "3RSY", "deposition_date": "2011-05-02", "title": "Cellobiose phosphorylase from cellulomonas uda in complex with sulfate And glycerol", "Keywords": ["H94", " alpha barrel", " cellobiose phosphorylase", " disaccharide", "Phosphorylase", "Transferase"], "authors": ["A.van hoorebeke", "J.stout", "W.soetaert", "J.van beeumen", "T.desmet", "S.savvides"]}
-{"classification": "Oxidoreductase", "pdb": "7MCI", "deposition_date": "2021-04-02", "title": "Mofe protein from azotobacter vinelandii with a sulfur-replenished Cofactor", "Keywords": ["Zotobacter vinelandii", " mofe-protein", " nitrogenase", " oxidoreductase"], "authors": ["W.kang", "C.lee", "Y.hu", "M.w.ribbe"], "doi": "10.1038/S41929-022-00782-7"}
-{"classification": "Dna", "pdb": "1XUW", "deposition_date": "2004-10-26", "title": "Structural rationalization of a large difference in rna affinity Despite a small difference in chemistry between two 2'-o-modified Nucleic acid analogs", "Keywords": ["Na mimetic methylcarbamate amide analog", " dna"], "authors": ["R.pattanayek", "L.sethaphong", "C.pan", "M.prhavc", "T.p.prakash", "M.manoharan", "", "M.egli"], "pmid": "15547979", "doi": "10.1021/JA044637K"}
-{"classification": "Lyase", "pdb": "7C0D", "deposition_date": "2020-05-01", "title": "Crystal structure of azospirillum brasilense l-2-keto-3-deoxyarabonate Dehydratase (hydroxypyruvate-bound form)", "Keywords": ["-2-keto-3-deoxyarabonate dehydratase", " lyase"], "authors": ["Y.watanabe", "S.watanabe"], "pmid": "32697085", "doi": "10.1021/ACS.BIOCHEM.0C00515"}
-{"classification": "Signaling protein", "pdb": "5LYK", "deposition_date": "2016-09-28", "title": "Crystal structure of intracellular b30.2 domain of btn3a1 bound to Citrate", "Keywords": ["30.2", " butyrophilin", " signaling protein"], "authors": ["F.mohammed", "A.t.baker", "M.salim", "B.e.willcox"], "pmid": "28862425", "doi": "10.1021/ACSCHEMBIO.7B00694"}
-{"classification": "Toxin", "pdb": "4IZL", "deposition_date": "2013-01-30", "title": "Structure of the n248a mutant of the panton-valentine leucocidin s Component from staphylococcus aureus", "Keywords": ["I-component leucotoxin", " staphylococcus aureus", " s component", "Leucocidin", "Beta-barrel pore forming toxin", "Toxin"], "authors": ["L.maveyraud", "B.j.laventie", "G.prevost", "L.mourey"], "pmid": "24643034", "doi": "10.1371/JOURNAL.PONE.0092094"}
-{"classification": "Dna", "pdb": "6F3C", "deposition_date": "2017-11-28", "title": "The cytotoxic [pt(h2bapbpy)] platinum complex interacting with the Cgtacg hexamer", "Keywords": ["Rug-dna complex", " four-way junction", " dna"], "authors": ["M.ferraroni", "C.bazzicalupi", "P.gratteri", "F.papi"], "pmid": "31046177", "doi": "10.1002/ANIE.201814532"}
-{"classification": "Signaling protein/inhibitor", "pdb": "4L5M", "deposition_date": "2013-06-11", "title": "Complexe of arno sec7 domain with the protein-protein interaction Inhibitor n-(4-hydroxy-2,6-dimethylphenyl)benzenesulfonamide at ph6.5", "Keywords": ["Ec-7domain", " signaling protein-inhibitor complex"], "authors": ["F.hoh", "J.rouhana"], "pmid": "24112024", "doi": "10.1021/JM4009357"}
-{"classification": "Signaling protein", "pdb": "5I6J", "deposition_date": "2016-02-16", "title": "Crystal structure of srgap2 f-barx", "Keywords": ["Rgap2", " f-bar", " fx", " signaling protein"], "authors": ["M.sporny", "J.guez-haddad", "M.n.isupov", "Y.opatowsky"], "pmid": "28333212", "doi": "10.1093/MOLBEV/MSX094"}
-{"classification": "Metal binding protein", "pdb": "1Q80", "deposition_date": "2003-08-20", "title": "Solution structure and dynamics of nereis sarcoplasmic calcium binding Protein", "Keywords": ["Ll-alpha", " metal binding protein"], "authors": ["G.rabah", "R.popescu", "J.a.cox", "Y.engelborghs", "C.t.craescu"], "pmid": "15819893", "doi": "10.1111/J.1742-4658.2005.04629.X"}
-{"classification": "Transferase", "pdb": "1TW1", "deposition_date": "2004-06-30", "title": "Beta-1,4-galactosyltransferase mutant met344his (m344h-gal-t1) complex With udp-galactose and magnesium", "Keywords": ["Et344his mutation; closed conformation; mn binding", " transferase"], "authors": ["B.ramakrishnan", "E.boeggeman", "P.k.qasba"], "pmid": "15449940", "doi": "10.1021/BI049007+"}
-{"classification": "Rna", "pdb": "2PN4", "deposition_date": "2007-04-23", "title": "Crystal structure of hepatitis c virus ires subdomain iia", "Keywords": ["Cv", " ires", " subdoamin iia", " rna", " strontium", " hepatitis"], "authors": ["Q.zhao", "Q.han", "C.r.kissinger", "P.a.thompson"], "pmid": "18391410", "doi": "10.1107/S0907444908002011"}
+{"pdb": "1CW0", "title": "crystal structure analysis of very short patch repair (vsr) endonuclease in complex with a duplex dna", "authors": ["S.E.Tsutakawa", "H.Jingami", "K.Morikawa"], "doi": "10.1016/S0092-8674(00)81550-0", "pmid": "10612397"}
+{"pdb": "2CWW", "title": "crystal structure of thermus thermophilus ttha1280, a putative sam- dependent rna methyltransferase, in complex with s-adenosyl-l- homocysteine", "authors": ["A.A.Pioszak", "K.Murayama", "N.Nakagawa", "A.Ebihara", "S.Kuramitsu", "M.Shirouzu", "S.Yokoyama", "Riken Structural Genomics/proteomics Initiative (Rsgi)"], "doi": "10.1107/S1744309105029842", "pmid": "16511182"}
+{"pdb": "6CWE", "title": "structure of alpha-gsa[8,6p] bound by cd1d and in complex with the va14vb8.2 tcr", "authors": ["J.Wang", "D.Zajonc"], "doi": null, "pmid": null}
+{"pdb": "5CWS", "title": "crystal structure of the intact chaetomium thermophilum nsp1-nup49- nup57 channel nucleoporin heterotrimer bound to its nic96 nuclear pore complex attachment site", "authors": ["C.J.Bley", "S.Petrovic", "M.Paduch", "V.Lu", "A.A.Kossiakoff", "A.Hoelz"], "doi": "10.1126/SCIENCE.AAC9176", "pmid": "26316600"}
+{"pdb": "5CWE", "title": "structure of cyp107l2 from streptomyces avermitilis with lauric acid", "authors": ["T.-V.Pham", "S.-H.Han", "J.-H.Kim", "D.-H.Kim", "L.-W.Kang"], "doi": null, "pmid": null}
+{"pdb": "7CW4", "title": "acetyl-coa acetyltransferase from bacillus cereus atcc 14579", "authors": ["J.Hong", "K.J.Kim"], "doi": "10.1016/J.BBRC.2020.09.048", "pmid": "32972748"}
+{"pdb": "2CWP", "title": "crystal structure of metrs related protein from pyrococcus horikoshii", "authors": ["K.Murayama", "M.Kato-Murayama", "M.Shirouzu", "S.Yokoyama", "Riken StructuralGenomics/proteomics Initiative (Rsgi)"], "doi": null, "pmid": null}
+{"pdb": "2CW7", "title": "crystal structure of intein homing endonuclease ii", "authors": ["H.Matsumura", "H.Takahashi", "T.Inoue", "H.Hashimoto", "M.Nishioka", "S.Fujiwara", "M.Takagi", "T.Imanaka", "Y.Kai"], "doi": "10.1002/PROT.20858", "pmid": "16493661"}
+{"pdb": "1CWU", "title": "brassica napus enoyl acp reductase a138g mutant complexed with nad+ and thienodiazaborine", "authors": ["A.Roujeinikova", "J.B.Rafferty", "D.W.Rice"], "doi": "10.1074/JBC.274.43.30811", "pmid": "10521472"}
+{"pdb": "3CWN", "title": "escherichia coli transaldolase b mutant f178y", "authors": ["T.Sandalova", "G.Schneider", "A.Samland"], "doi": "10.1074/JBC.M803184200", "pmid": "18687684"}
+{"pdb": "1CWL", "title": "human cyclophilin a complexed with 4 4-hydroxy-meleu cyclosporin", "authors": ["V.Mikol", "J.Kallen", "P.Taylor", "M.D.Walkinshaw"], "doi": "10.1006/JMBI.1998.2108", "pmid": "9769216"}
+{"pdb": "3CW2", "title": "crystal structure of the intact archaeal translation initiation factor 2 from sulfolobus solfataricus .", "authors": ["E.A.Stolboushkina", "S.V.Nikonov", "A.D.Nikulin", "U.Blaesi", "D.J.Manstein", "R.V.Fedorov", "M.B.Garber", "O.S.Nikonov"], "doi": "10.1016/J.JMB.2008.07.039", "pmid": "18675278"}
+{"pdb": "3CW9", "title": "4-chlorobenzoyl-coa ligase/synthetase in the thioester-forming conformation, bound to 4-chlorophenacyl-coa", "authors": ["A.S.Reger", "J.Cao", "R.Wu", "D.Dunaway-Mariano", "A.M.Gulick"], "doi": "10.1021/BI800696Y", "pmid": "18620418"}
+{"pdb": "3CWU", "title": "crystal structure of an alka host/guest complex 2'-fluoro-2'-deoxy-1, n6-ethenoadenine:thymine base pair", "authors": ["B.R.Bowman", "S.Lee", "S.Wang", "G.L.Verdine"], "doi": "10.1016/J.STR.2008.04.012", "pmid": "18682218"}
+{"pdb": "5CWF", "title": "crystal structure of de novo designed helical repeat protein dhr8", "authors": ["G.Bhabha", "D.C.Ekiert"], "doi": "10.1038/NATURE16162", "pmid": "26675729"}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
@ -1,36 +1,6 @@
-{"pid": " Q6GZX4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 43"}], "title": "Putative transcription factor 001R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZX3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 45"}], "title": "Uncharacterized protein 002L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 29"}], "title": "Uncharacterized protein 002R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q197F7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 23"}], "title": "Uncharacterized protein 003L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZX2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 3R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZX1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 38"}], "title": "Uncharacterized protein 004R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 005L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZX0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 47"}], "title": "Uncharacterized protein 005R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q91G88", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-06-28", "date_info": "entry version 53"}], "title": "Putative KilA-N domain-containing protein 006L", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZW9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 34"}], "title": "Uncharacterized protein 006R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 32"}], "title": "Uncharacterized protein 007R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F3", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 007R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q197F2", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 22"}], "title": "Uncharacterized protein 008L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 67"}], "title": "Putative helicase 009L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q91G85", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 38"}], "title": "Uncharacterized protein 009R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZW5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 010R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197E9", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 011L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 011R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 012L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197E7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 37"}], "title": "Uncharacterized protein IIV3-013L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 30"}], "title": "Uncharacterized protein 013R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 014R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 50"}], "title": "Uncharacterized protein 015R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 017L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV7", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 018L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 87"}], "title": "Putative serine/threonine-protein kinase 019R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 40"}], "title": "Uncharacterized protein 020R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 021L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-12-14", "date_info": "entry version 35"}], "title": "Transmembrane protein 022L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZV2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 023R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 25"}], "title": "Uncharacterized protein 023R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZV1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 024R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 24"}], "title": "Uncharacterized protein 025R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q91G70", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 026R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZU9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 49"}], "title": "Uncharacterized protein 027R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZU8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 55"}], "title": "Uncharacterized protein 028R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX4", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 41."}], "title": "Putative transcription factor 001R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX3", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 42."}], "title": "Uncharacterized protein 002L;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q197F8", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 27."}], "title": "Uncharacterized protein 002R;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
+{"pid": "Q197F7", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 23."}], "title": "Uncharacterized protein 003L;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
+{"pid": "Q6GZX2", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 36."}], "title": "Uncharacterized protein 3R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX1", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 34."}], "title": "Uncharacterized protein 004R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala
@ -26,7 +26,7 @@ class MAGMappingTest {
  @Test
  def mappingMagType(): Unit = {

-    checkResult[Publication](MagUtility.createResultFromType(null, null), invisible = true, "Other literature type")
+    checkResult[Publication](MagUtility.createResultFromType(null, null), invisible = false, "Other literature type")
    checkResult[Publication](
      MagUtility.createResultFromType(Some("BookChapter"), null),
      invisible = false,
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
@ -2,14 +2,13 @@
 package eu.dnetlib.dhp.oa.dedup;

 import java.util.*;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;

 import org.apache.commons.beanutils.BeanUtils;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.FlatMapGroupsFunction;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.*;

 import eu.dnetlib.dhp.oa.dedup.model.Identifier;
@ -107,6 +106,8 @@ public class DedupRecordFactory {

 					final HashSet<String> acceptanceDate = new HashSet<>();

+					boolean isVisible = false;
+
 					while (it.hasNext()) {
 						Tuple3<String, String, OafEntity> t = it.next();
 						OafEntity entity = t._3();
@ -114,6 +115,7 @@ public class DedupRecordFactory {
 						if (entity == null) {
 							aliases.add(t._2());
 						} else {
+							isVisible = isVisible || !entity.getDataInfo().getInvisible();
 							cliques.add(entity);

 							if (acceptanceDate.size() < MAX_ACCEPTANCE_DATE) {
@ -129,13 +131,20 @@ public class DedupRecordFactory {

 					}

-					if (acceptanceDate.size() >= MAX_ACCEPTANCE_DATE || cliques.isEmpty()) {
+					if (!isVisible || acceptanceDate.size() >= MAX_ACCEPTANCE_DATE || cliques.isEmpty()) {
 						return Collections.emptyIterator();
 					}

 					OafEntity mergedEntity = MergeUtils.mergeGroup(dedupId, cliques.iterator());
 					// dedup records do not have date of transformation attribute
 					mergedEntity.setDateoftransformation(null);
+					mergedEntity
+						.setMergedIds(
+							Stream
+								.concat(cliques.stream().map(OafEntity::getId), aliases.stream())
+								.distinct()
+								.sorted()
+								.collect(Collectors.toList()));

 					return Stream
 						.concat(
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java
@ -5,11 +5,11 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTION
 import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP;

 import java.io.IOException;
+import java.util.Arrays;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.*;
 import org.dom4j.DocumentException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -17,6 +17,7 @@ import org.xml.sax.SAXException;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.EntityType;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
@ -25,6 +26,7 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import eu.dnetlib.pace.config.DedupConfig;
+import scala.collection.JavaConverters;

 public class SparkCreateDedupRecord extends AbstractSparkAction {

@ -85,6 +87,36 @@ public class SparkCreateDedupRecord extends AbstractSparkAction {
 				.mode(SaveMode.Overwrite)
 				.option("compression", "gzip")
 				.json(outputPath);
+
+			log.info("Updating mergerels for: '{}'", subEntity);
+			final Dataset<Row> dedupIds = spark
+				.read()
+				.schema("`id` STRING, `mergedIds` ARRAY<STRING>")
+				.json(outputPath)
+				.selectExpr("id as source", "explode(mergedIds) as target");
+			spark
+				.read()
+				.load(mergeRelPath)
+				.where("relClass == 'merges'")
+				.join(dedupIds, JavaConverters.asScalaBuffer(Arrays.asList("source", "target")).toSeq(), "left_semi")
+				.write()
+				.mode(SaveMode.Overwrite)
+				.option("compression", "gzip")
+				.save(workingPath + "/mergerel_filtered");
+
+			final Dataset<Row> validRels = spark.read().load(workingPath + "/mergerel_filtered");
+
+			final Dataset<Row> filteredMergeRels = validRels
+				.union(
+					validRels
+						.withColumnRenamed("source", "source_tmp")
+						.withColumnRenamed("target", "target_tmp")
+						.withColumn("relClass", functions.lit(ModelConstants.IS_MERGED_IN))
+						.withColumnRenamed("target_tmp", "source")
+						.withColumnRenamed("source_tmp", "target"));
+
+			saveParquet(filteredMergeRels, mergeRelPath, SaveMode.Overwrite);
+			removeOutputDir(spark, workingPath + "/mergerel_filtered");
 		}
 	}

--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
@ -17,6 +17,45 @@ import eu.dnetlib.pace.tree.support.TreeStats;

 class DecisionTreeTest {

+	@Test
+	void testJPath() throws IOException {
+
+		DedupConfig conf = DedupConfig
+			.load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_organization.json")));
+
+		final String org = IOUtils.toString(getClass().getResourceAsStream("organization.json"));
+
+		Row row = SparkModel.apply(conf).rowFromJson(org);
+
+		System.out.println("row = " + row);
+		Assertions.assertNotNull(row);
+		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
+
+		System.out.println("row = " + row.getAs("countrytitle"));
+	}
+
+	@Test
+	void jsonToModelTest() throws IOException {
+		DedupConfig conf = DedupConfig
+			.load(
+				IOUtils
+					.toString(
+						SparkOpenorgsDedupTest.class
+							.getResourceAsStream(
+								"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
+
+		final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json"));
+
+		Row row = SparkModel.apply(conf).rowFromJson(org);
+		// to check that the same parsing returns the same row
+		Row row1 = SparkModel.apply(conf).rowFromJson(org);
+
+		Assertions.assertEquals(row, row1);
+		System.out.println("row = " + row);
+		Assertions.assertNotNull(row);
+		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
+	}
+
 	@Test
 	void organizationDecisionTreeTest() throws Exception {
 		DedupConfig conf = DedupConfig
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@ -452,18 +452,18 @@ public class SparkDedupTest implements Serializable {
 			assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
 			assertEquals(ModelConstants.DEDUP, r.getSubRelType());
 			assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
-			assertFalse(dups.contains(r.getTarget()));
+			assertTrue(dups.contains(r.getTarget()));
 		});

 		final List<Relation> mergedIn = pubs
 			.filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'")
 			.collectAsList();
-		assertEquals(1, mergedIn.size());
+		assertEquals(3, mergedIn.size());
 		mergedIn.forEach(r -> {
 			assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
 			assertEquals(ModelConstants.DEDUP, r.getSubRelType());
-			assertEquals(ModelConstants.MERGES, r.getRelClass());
-			assertFalse(dups.contains(r.getSource()));
+			assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
+			assertTrue(dups.contains(r.getSource()));
 		});

 		System.out.println("orgs_mergerel = " + orgs_mergerel);
@ -473,8 +473,8 @@ public class SparkDedupTest implements Serializable {
 		System.out.println("orp_mergerel = " + orp_mergerel);

 		if (CHECK_CARDINALITIES) {
-			assertEquals(1278, orgs_mergerel);
-			assertEquals(1158, pubs.count());
+			assertEquals(1268, orgs_mergerel);
+			assertEquals(1156, pubs.count());
 			assertEquals(292, sw_mergerel);
 			assertEquals(476, ds_mergerel);
 			assertEquals(742, orp_mergerel);
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
@ -241,6 +241,7 @@ public class SparkPublicationRootsTest implements Serializable {

 		verifyRoot_case_1(roots, pubs);
 		verifyRoot_case_2(roots, pubs);
+		verifyRoot_case_3(roots, pubs);
 	}

 	private static void verifyRoot_case_1(Dataset<Publication> roots, Dataset<Publication> pubs) {
@ -321,6 +322,34 @@ public class SparkPublicationRootsTest implements Serializable {
 		assertTrue(Sets.difference(root_cf, dups_cf).isEmpty());
 	}

+	private void verifyRoot_case_3(Dataset<Publication> roots, Dataset<Publication> pubs) {
+		Publication root = roots
+			.filter("id = '50|dedup_wf_001::31ca734cc22181b704c4aa8fd050062a'")
+			.first();
+		assertNotNull(root);
+
+		Publication pivot_duplicate = pubs
+			.filter("id = '50|od_______166::31ca734cc22181b704c4aa8fd050062a'")
+			.first();
+
+		assertEquals(pivot_duplicate.getPublisher().getValue(), root.getPublisher().getValue());
+
+		Set<String> dups_cf = pubs
+			.collectAsList()
+			.stream()
+			.flatMap(p -> p.getCollectedfrom().stream())
+			.map(KeyValue::getValue)
+			.collect(Collectors.toCollection(HashSet::new));
+
+		Set<String> root_cf = root
+			.getCollectedfrom()
+			.stream()
+			.map(KeyValue::getValue)
+			.collect(Collectors.toCollection(HashSet::new));
+
+		assertTrue(Sets.difference(root_cf, dups_cf).isEmpty());
+	}
+
 	@Test
 	@Order(6)
 	void updateEntityTest() throws Exception {
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java
@ -143,9 +143,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 				"--graphBasePath", graphInputPath,
 				"--actionSetId", testActionSetId,
 				"--isLookUpUrl", "lookupurl",
-				"--workingPath", workingPath,
-				"--hiveMetastoreUris", "none",
-				"--pivotHistoryDatabase", ""
+				"--workingPath", workingPath
 			}), spark)
 				.run(isLookUpService);

@ -155,7 +153,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.as(Encoders.bean(Relation.class));

 		assertEquals(
-			4, merges
+			3, merges
 				.filter("relclass == 'isMergedIn'")
 				.map((MapFunction<Relation, String>) Relation::getTarget, Encoders.STRING())
 				.distinct()
@ -180,7 +178,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord")
 			.map(asEntity(Publication.class), Encoders.bean(Publication.class));

-		assertEquals(4, roots.count());
+		assertEquals(3, roots.count());

 		final Dataset<Publication> pubs = spark
 			.read()
@ -197,7 +195,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.collectAsList()
 			.get(0);

-		assertEquals("2022-01-01", root.getDateofacceptance().getValue());
+		assertEquals(crossref_duplicate.getDateofacceptance().getValue(), root.getDateofacceptance().getValue());
 		assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName());
 		assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted());
 		assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue());
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
@ -168,7 +168,7 @@ public class SparkStatsTest implements Serializable {
 			.load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats")
 			.count();

-		assertEquals(412, orgs_blocks);
+		assertEquals(414, orgs_blocks);
 		assertEquals(221, pubs_blocks);
 		assertEquals(134, sw_blocks);
 		assertEquals(196, ds_blocks);
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
@ -73,6 +73,12 @@
    "name": "Irish Nephrology Society",
    "synonym": []
  },
+  {
+    "id": "100011062",
+    "uri": "http://dx.doi.org/10.13039/100011062",
+    "name": "Asian Spinal Cord Network",
+    "synonym": []
+  },
  {
    "id": "100011096",
    "uri": "http://dx.doi.org/10.13039/100011096",
@ -217,6 +223,12 @@
    "name": "Global Brain Health Institute",
    "synonym": []
  },
+  {
+    "id": "100015776",
+    "uri": "http://dx.doi.org/10.13039/100015776",
+    "name": "Health and Social Care Board",
+    "synonym": []
+  },
  {
    "id": "100015992",
    "uri": "http://dx.doi.org/10.13039/100015992",
@ -391,6 +403,18 @@
    "name": "Irish Hospice Foundation",
    "synonym": []
  },
+  {
+    "id": "501100001596",
+    "uri": "http://dx.doi.org/10.13039/501100001596",
+    "name": "Irish Research Council for Science, Engineering and Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100001597",
+    "uri": "http://dx.doi.org/10.13039/501100001597",
+    "name": "Irish Research Council for the Humanities and Social Sciences",
+    "synonym": []
+  },
  {
    "id": "501100001598",
    "uri": "http://dx.doi.org/10.13039/501100001598",
@ -491,7 +515,7 @@
    "id": "501100002081",
    "uri": "http://dx.doi.org/10.13039/501100002081",
    "name": "Irish Research Council",
-    "synonym": ["501100001596", "501100001597"]
+    "synonym": []
  },
  {
    "id": "501100002736",
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -7,7 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactor
 import eu.dnetlib.dhp.utils.DHPUtils
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil._
-import org.apache.commons.lang.StringUtils
+import org.apache.commons.lang3.StringUtils
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.JsonAST._
@ -560,15 +560,11 @@ case object Crossref2Oaf {
                "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
                "10.13039/501100013589" | "10.13039/501100000271" =>
              generateSimpleRelationFromAward(funder, "ukri________", a => a)
-            //HFRI
-            case "10.13039/501100013209" =>
-              generateSimpleRelationFromAward(funder, "hfri________", a => a)
-              val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
+            //DFG
+            case "10.13039/501100001659" =>
+              val targetId = getProjectId("dfgf________", "1e5e62235d094afd01cd56e65112fc63")
              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
-            //ERASMUS+
-            case "10.13039/501100010790" =>
-              generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
            case _ => logger.debug("no match for " + funder.DOI.get)

          }
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala
@ -313,7 +313,7 @@ case object ConversionUtil {
      if (f.author.DisplayName.isDefined)
        a.setFullname(f.author.DisplayName.get)
      if (f.affiliation != null)
-        a.setAffiliation(List(asField(f.affiliation)).asJava)
+        a.setRawAffiliationString(List(f.affiliation).asJava)
      a.setPid(
        List(
          createSP(
@ -386,7 +386,7 @@ case object ConversionUtil {
      a.setFullname(f.author.DisplayName.get)

      if (f.affiliation != null)
-        a.setAffiliation(List(asField(f.affiliation)).asJava)
+        a.setRawAffiliationString(List(f.affiliation).asJava)

      a.setPid(
        List(
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
 import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
-import org.apache.commons.lang.StringUtils
+import org.apache.commons.lang3.StringUtils
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.JsonAST._
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java
@ -6,11 +6,11 @@ import java.io.Serializable;
 import java.util.*;
 import java.util.stream.Collectors;

+import org.apache.commons.lang3.StringUtils;
 import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.amazonaws.util.StringUtils;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Maps;

@ -81,7 +81,7 @@ public class Utils implements Serializable {
 		Community c = new Community();
 		c.setId(cm.getId());
 		c.setZenodoCommunities(cm.getOtherZenodoCommunities());
-		if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
+		if (StringUtils.isNotBlank(cm.getZenodoCommunity()))
 			c.getZenodoCommunities().add(cm.getZenodoCommunity());
 		c.setSubjects(cm.getSubjects());
 		c.getSubjects().addAll(cm.getFos());
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java
@ -13,13 +13,13 @@ public class CommunityContentprovider {
 	private String openaireId;
 	private SelectionConstraints selectioncriteria;

-	private Boolean enabled;
+	private String enabled;

-	public Boolean getEnabled() {
+	public String getEnabled() {
 		return enabled;
 	}

-	public void setEnabled(Boolean enabled) {
+	public void setEnabled(String enabled) {
 		this.enabled = enabled;
 	}

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java
@ -4,7 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;

-import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonIgnore;

 import eu.dnetlib.dhp.bulktag.criteria.Selection;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
@ -53,8 +53,6 @@ public class Constraints implements Serializable {

 		for (Constraint sc : constraint) {
 			boolean verified = false;
-			if (!param.containsKey(sc.getField()))
-				return false;
 			for (String value : param.get(sc.getField())) {
 				if (sc.verifyCriteria(value.trim())) {
 					verified = true;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
@ -130,7 +130,6 @@ public class ResultTagger implements Serializable {
 					// log.info("Remove constraints for " + communityId);
 					if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
 						conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
-							!conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() &&
 						conf
 							.getRemoveConstraintsMap()
 							.get(communityId)
@ -162,30 +161,29 @@ public class ResultTagger implements Serializable {

 		// Tagging for datasource
 		final Set<String> datasources = new HashSet<>();
-		final Set<String> cfhb = new HashSet<>();
+		final Set<String> collfrom = new HashSet<>();
 		final Set<String> hostdby = new HashSet<>();

 		if (Objects.nonNull(result.getInstance())) {
 			for (Instance i : result.getInstance()) {
 				if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
-					cfhb.add(i.getCollectedfrom().getKey());
+					collfrom.add(i.getCollectedfrom().getKey());
 				}
 				if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
-					cfhb.add(i.getHostedby().getKey());
 					hostdby.add(i.getHostedby().getKey());
 				}

 			}

-			cfhb
+			collfrom
 				.forEach(
 					dsId -> datasources
 						.addAll(
 							conf.getCommunityForDatasource(dsId, param)));
 			hostdby.forEach(dsId -> {
-//				datasources
-//					.addAll(
-//						conf.getCommunityForDatasource(dsId, param));
+				datasources
+					.addAll(
+						conf.getCommunityForDatasource(dsId, param));
 				if (conf.isEoscDatasource(dsId)) {
 					datasources.add("eosc");
 				}
@ -228,7 +226,6 @@ public class ResultTagger implements Serializable {
 			.forEach(communityId -> {
 				if (!removeCommunities.contains(communityId) &&
 					conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
-						!conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() &&
 					conf
 						.getSelectionConstraintsMap()
 						.get(communityId)
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java
@ -33,8 +33,6 @@ public class SelectionConstraints implements Serializable {

 	// Constraints in or
 	public boolean verifyCriteria(final Map<String, List<String>> param) {
-		if (criteria.isEmpty())
-			return true;
 		for (Constraints selc : criteria) {
 			if (selc.verifyCriteria(param)) {
 				return true;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -14,7 +14,6 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@ -85,12 +84,11 @@ public class SparkCountryPropagationJob {
 		Dataset<R> res = readPath(spark, sourcePath, resultClazz);

 		log.info("Reading prepared info: {}", preparedInfoPath);
-		final Dataset<Row> preparedInfoRaw = spark
+		Dataset<ResultCountrySet> prepared = spark
 			.read()
-			.json(preparedInfoPath);
+			.json(preparedInfoPath)
+			.as(Encoders.bean(ResultCountrySet.class));

-		if (!preparedInfoRaw.isEmpty()) {
-			final Dataset<ResultCountrySet> prepared = preparedInfoRaw.as(Encoders.bean(ResultCountrySet.class));
 		res
 			.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
 			.map(getCountryMergeFn(), Encoders.bean(resultClazz))
@ -98,13 +96,7 @@ public class SparkCountryPropagationJob {
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(outputPath);
-		} else {
-			res
-				.write()
-				.option("compression", "gzip")
-				.mode(SaveMode.Overwrite)
-				.json(outputPath);
-		}
+
 	}

 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
@ -147,7 +147,6 @@ public class CleanGraphSparkJob {
 			.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
-			.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
 			.filter((FilterFunction<T>) GraphCleaningFunctions::filter);

 		// read the master-duplicate tuples
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
@ -9,10 +9,7 @@ import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -25,8 +22,6 @@ public class GraphHiveTableImporterJob {

 	private static final Logger log = LoggerFactory.getLogger(GraphHiveTableImporterJob.class);

-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
 	public static void main(String[] args) throws Exception {

 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -74,7 +69,12 @@ public class GraphHiveTableImporterJob {
 	private static <T extends Oaf> void loadGraphTable(SparkSession spark, String inputPath, String hiveDbName,
 		Class<T> clazz, int numPartitions) {

-		Dataset<String> dataset = spark.read().textFile(inputPath);
+		final Encoder<T> clazzEncoder = Encoders.bean(clazz);
+
+		Dataset<Row> dataset = spark
+			.read()
+			.schema(clazzEncoder.schema())
+			.json(inputPath);

 		if (numPartitions > 0) {
 			log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions);
@ -82,7 +82,6 @@ public class GraphHiveTableImporterJob {
 		}

 		dataset
-			.map((MapFunction<String, T>) s -> OBJECT_MAPPER.readValue(s, clazz), Encoders.bean(clazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.saveAsTable(tableIdentifier(hiveDbName, clazz));
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -519,6 +519,28 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 						r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
 						r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
 						break;
+					case "resultOrganization_affiliation_isAuthorInstitutionOf":
+						if (!"organization".equals(sourceType)) {
+							throw new IllegalStateException(
+									String
+											.format(
+													"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
+													semantics));
+						}
+						r1 = setRelationSemantic(r1, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF);
+						r2 = setRelationSemantic(r2, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION);
+						break;
+					case "resultOrganization_affiliation_hasAuthorInstitution":
+						if (!"organization".equals(targetType)) {
+							throw new IllegalStateException(
+									String
+											.format(
+													"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
+													semantics));
+						}
+						r1 = setRelationSemantic(r1, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION);
+						r2 = setRelationSemantic(r2, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF);
+						break;
 					default:
 						throw new IllegalArgumentException("claim semantics not managed: " + semantics);
 				}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -94,7 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 					author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
 				}

-				author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
+				author.setRawAffiliationString(prepareListString(n, "./*[local-name()='affiliation']"));
 				author.setPid(preparePids(n, info));
 				author.setRank(pos++);
 				res.add(author);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
@ -22,5 +22,11 @@
    "paramLongName": "targetPath",
    "paramDescription": "the output path of the graph enriched",
    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingDir",
+    "paramDescription": "the working dir",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
@ -223,13 +223,11 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -255,13 +253,11 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -282,7 +278,6 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
    log.info(s"orcidPath is '$orcidPath'")
    val targetPath = parser.get("targetPath")
    log.info(s"targetPath is '$targetPath'")
+    val workingDir = parser.get("workingDir")
+    log.info(s"targetPath is '$workingDir'")

-    createTemporaryData(graphPath, orcidPath, targetPath)
-    analisys(targetPath)
-    generateGraph(graphPath, targetPath)
+    createTemporaryData(graphPath, orcidPath, workingDir)
+    analisys(workingDir)
+    generateGraph(graphPath, workingDir, targetPath)
  }

-  private def generateGraph(graphPath: String, targetPath: String): Unit = {
+  private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {

    ModelSupport.entityTypes.asScala
      .filter(e => ModelSupport.isResult(e._1))
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]

        val matched = spark.read
          .schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
-          .parquet(s"${targetPath}/${resultType}_matched")
+          .parquet(s"${workingDir}/${resultType}_matched")
          .selectExpr("id", "enriched_author")

        spark.read
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java
@ -73,14 +73,10 @@ public class GraphHiveImporterJobTest {
 		GraphHiveImporterJob
 			.main(
 				new String[] {
-					"-isSparkSessionManaged",
-					Boolean.FALSE.toString(),
-					"-inputPath",
-					getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(),
-					"-hiveMetastoreUris",
-					"",
-					"-hiveDbName",
-					dbName
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--inputPath", getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(),
+					"--hiveMetastoreUris", "",
+					"--hiveDbName", dbName
 				});

 		ModelSupport.oafTypes
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -406,15 +406,15 @@ class MappersTest {
 		assertEquals("Baracchini", author.get().getSurname());
 		assertEquals("Theo", author.get().getName());

-		assertEquals(1, author.get().getAffiliation().size());
-		final Optional<Field<String>> opAff = author
+		assertEquals(1, author.get().getRawAffiliationString().size());
+		final Optional<String> opAff = author
 			.get()
-			.getAffiliation()
+			.getRawAffiliationString()
 			.stream()
 			.findFirst();
 		assertTrue(opAff.isPresent());
-		final Field<String> affiliation = opAff.get();
-		assertEquals("ISTI-CNR", affiliation.getValue());
+		final String affiliation = opAff.get();
+		assertEquals("ISTI-CNR", affiliation);

 		assertFalse(d.getSubject().isEmpty());
 		assertFalse(d.getInstance().isEmpty());
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
@ -16,6 +16,8 @@ import java.util.Objects;
 import java.util.Optional;
 import java.util.stream.Collectors;

+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.common.RelationInverse;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.junit.jupiter.api.BeforeEach;
@ -364,6 +366,39 @@ class MigrateDbEntitiesApplicationTest {
 		assertValidId(r1.getCollectedfrom().get(0).getKey());
 		assertValidId(r2.getCollectedfrom().get(0).getKey());
 	}
+	@Test
+	void testProcessClaims_affiliation() throws Exception {
+		final List<TypedField> fields = prepareMocks("claimsrel_resultset_affiliation.json");
+
+		final List<Oaf> list = app.processClaims(rs);
+
+		assertEquals(2, list.size());
+		verifyMocks(fields);
+
+		assertTrue(list.get(0) instanceof Relation);
+		assertTrue(list.get(1) instanceof Relation);
+
+		final Relation r1 = (Relation) list.get(0);
+		final Relation r2 = (Relation) list.get(1);
+
+		assertValidId(r1.getSource());
+		assertValidId(r1.getTarget());
+		assertValidId(r2.getSource());
+		assertValidId(r2.getTarget());
+		assertNotNull(r1.getDataInfo());
+		assertNotNull(r2.getDataInfo());
+		assertNotNull(r1.getDataInfo().getTrust());
+		assertNotNull(r2.getDataInfo().getTrust());
+		assertEquals(r1.getSource(), r2.getTarget());
+		assertEquals(r2.getSource(), r1.getTarget());
+		assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
+		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
+		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
+		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
+
+		assertValidId(r1.getCollectedfrom().get(0).getKey());
+		assertValidId(r2.getCollectedfrom().get(0).getKey());
+	}

 	private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {
 		final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile));
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/claimsrel_resultset_affiliation.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/claimsrel_resultset_affiliation.json
@ -0,0 +1,27 @@
+[
+	{
+		"field": "source_type",
+		"type": "string",
+		"value": "organization"
+	},
+	{
+		"field": "source_id",
+		"type": "string",
+		"value": "openorgs____::b5ca9d4340e26454e367e2908ef3872f"
+	},
+	{
+		"field": "target_type",
+		"type": "string",
+		"value": "software"
+	},
+	{
+		"field": "target_id",
+		"type": "string",
+		"value": "userclaim___::bde53826d07c8cf47c99222a375cd2e8"
+	},
+	{
+		"field": "semantics",
+		"type": "string",
+		"value": "resultOrganization_affiliation_isAuthorInstitutionOf"
+	}
+]
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala
@ -31,5 +31,11 @@ class ORCIDAuthorMatchersTest {
    assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
    // assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
  }
+  @Test def testDocumentationNames(): Unit = {
+    assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones"))
+  }

+  @Test def testDocumentationNames2(): Unit = {
+    assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller Jones"))
+  }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
@ -69,7 +69,7 @@
        </configuration>
    </global>

-    <start to="irish_oaiphm_provision"/>
+    <start to="oaiphm_provision"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
@ -67,7 +67,7 @@ public class PrepareRelationsJobTest {
 	@Test
 	void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception {

-		final int maxRelations = 5;
+		final int maxRelations = 20;
 		PrepareRelationsJob
 			.main(
 				new String[] {
@ -86,7 +86,7 @@ public class PrepareRelationsJobTest {
 			.as(Encoders.bean(Relation.class))
 			.cache();

-		assertEquals(44, out.count());
+		assertEquals(maxRelations, out.count());

 		Dataset<Row> freq = out
 			.toDF()
@ -101,8 +101,12 @@ public class PrepareRelationsJobTest {
 		long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count");

 		assertEquals(outcome, participation);
-		assertEquals(outcome, affiliation);
-		assertEquals(4, affiliation);
+		assertTrue(outcome > affiliation);
+		assertTrue(participation > affiliation);
+
+		assertEquals(7, outcome);
+		assertEquals(7, participation);
+		assertEquals(6, affiliation);
 	}

 	protected List<Row> getRows(Dataset<Row> freq, String col) {
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
@ -91,6 +91,9 @@ class SolrRecordDumpJobTest {
 	public void prepareMocks() throws ISLookUpException, IOException {
 		isLookupClient.setIsLookup(isLookUpService);

+		Mockito
+			.when(isLookupClient.getDsId(Mockito.anyString()))
+			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
 			.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/get_score_limits.sh
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/get_score_limits.sh
@ -1,63 +0,0 @@
-#/usr/bin/bash
-
-# Read log files from ranking scripts and create a two-line file  
-# with score limits for the various measures. To be used by Kleanthis
-
-attrank_file=$(ls *attrank*.log);
-pr_file=$(ls *pagerank*.log)
-ram_file=$(ls *ram*.log);
-cc_file=$(ls *cc*.log);
-impulse_file=$(ls *impulse*.log);
-
-echo
-echo "-----------------------------"
-echo "Attrank file:${attrank_file}";
-echo "PageRank file:${pr_file}";
-echo "RAM file:${ram_file}";
-echo "CC file:${cc_file}";
-echo "Impulse file:${impulse_file}";
-echo "-----------------------------"
-echo
-echo
-
-# output file will be called score_limits.csv
-echo -e "influence_top001\tinfluence_top01\tinfluence_top1\tinfluence_top10\tpopularity_top001\tpopularity_top01\tpopularity_top1\tpopularity_top10\timpulse_top001\timpulse_top01\timpulse_top1\timpulse_top10\tcc_top001\tcc_top01\tcc_top1\tcc_top10" > score_limits.csv
-# ---------------------------------------------------- #
-# Get respective score limits (we don't need RAM)
-inf_001=$(grep "^0.01%" ${pr_file} | cut -f 2);
-inf_01=$(grep "^0.1%" ${pr_file} | cut -f 2);
-inf_1=$(grep "^1%" ${pr_file} | cut -f 2);
-inf_10=$(grep "^10%" ${pr_file} | cut -f 2);
-echo "Influnence limits:"
-echo -e "${inf_001}\t${inf_01}\t${inf_1}\t${inf_10}";
-# ---------------------------------------------------- #
-pop_001=$(grep "^0.01%" ${attrank_file} | cut -f 2);
-pop_01=$(grep "^0.1%" ${attrank_file} | cut -f 2);
-pop_1=$(grep "^1%" ${attrank_file} | cut -f 2);
-pop_10=$(grep "^10%" ${attrank_file} | cut -f 2);
-echo "Popularity limits:";
-echo -e "${pop_001}\t${pop_01}\t${pop_1}\t${pop_10}";
-# ---------------------------------------------------- #
-imp_001=$(grep "^0.01%" ${impulse_file} | cut -f 2);
-imp_01=$(grep "^0.1%" ${impulse_file} | cut -f 2);
-imp_1=$(grep "^1%" ${impulse_file} | cut -f 2);
-imp_10=$(grep "^10%" ${impulse_file} | cut -f 2);
-echo "Popularity limits:";
-echo -e "${imp_001}\t${imp_01}\t${imp_1}\t${imp_10}";
-# ---------------------------------------------------- #
-cc_001=$(grep "^0.01%" ${cc_file} | cut -f 2);
-cc_01=$(grep "^0.1%" ${cc_file} | cut -f 2);
-cc_1=$(grep "^1%" ${cc_file} | cut -f 2);
-cc_10=$(grep "^10%" ${cc_file} | cut -f 2);
-echo "Popularity limits:";
-echo -e "${cc_001}\t${cc_01}\t${cc_1}\t${cc_10}";
-# ---------------------------------------------------- #
-
-echo -e "${inf_001}\t${inf_01}\t${inf_1}\t${inf_10}\t${pop_001}\t${pop_01}\t${pop_1}\t${pop_10}\t${imp_001}\t${imp_01}\t${imp_1}\t${imp_10}\t${cc_001}\t${cc_01}\t${cc_1}\t${cc_10}" >> score_limits.csv
-
-echo
-echo "score_limits.csv contents:"
-cat score_limits.csv
-
-echo;
-echo;
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/map_openaire_ids_to_dois.py
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/map_openaire_ids_to_dois.py
@ -1,60 +0,0 @@
-import json
-import sys
-from pyspark.sql import SparkSession
-from pyspark import SparkConf, SparkContext
-
-if len(sys.argv) != 3:
-    print("Usage: map_openaire_ids_to_dois.py <hdfs_src_dir> <hdfs_output_dir>")
-    sys.exit(-1)
-
-conf = SparkConf().setAppName('BIP!: Map OpenAIRE IDs to DOIs')
-sc = SparkContext(conf = conf)
-spark = SparkSession.builder.appName('BIP!: Map OpenAIRE IDs to DOIs').getOrCreate()
-sc.setLogLevel('OFF')
-
-src_dir = sys.argv[1]
-output = sys.argv[2]
-
-# src_dir = "/tmp/beta_provision/graph/21_graph_cleaned/"
-# output = '/tmp/openaireid_to_dois/'
-
-def transform(doc):
-    
-    # get publication year from 'doc.dateofacceptance.value'
-    dateofacceptance = doc.get('dateofacceptance', {}).get('value')
-
-    year = 0 
-    
-    if (dateofacceptance is not None):
-        year = dateofacceptance.split('-')[0]
-
-    # for each pid get 'pid.value' if 'pid.qualifier.classid' equals to 'doi'
-    dois = [ pid['value'] for pid in doc.get('pid', [])  if (pid.get('qualifier', {}).get('classid') == 'doi' and pid['value'] is not None)]
-
-    num_dois = len(dois)
-    
-    # exlcude openaire ids that do not correspond to DOIs
-    if (num_dois == 0): 
-        return None
-        
-    fields = [ doc['id'], str(num_dois), chr(0x02).join(dois), str(year) ]
-    
-    return '\t'.join([ v.encode('utf-8') for v in fields ])
-    
-docs = None
-
-for result_type in ["publication", "dataset", "software", "otherresearchproduct"]:
-    
-    tmp = sc.textFile(src_dir + result_type).map(json.loads)
-    
-    if (docs is None):
-        docs = tmp
-    else:
-        # append all result types in one RDD
-        docs = docs.union(tmp)
-
-docs = docs.filter(lambda d: d.get('dataInfo', {}).get('deletedbyinference') == False and d.get('dataInfo', {}).get('invisible') == False)
-
-docs = docs.map(transform).filter(lambda d: d is not None)
-
-docs.saveAsTextFile(output)
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/map_scores_to_dois.py
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/map_scores_to_dois.py
@ -1,168 +0,0 @@
-#!/usr/bin/python
-# This program reads the openaire to doi mapping from the ${synonymFolder} of the workflow
-# and uses this mapping to create doi-based score files in the format required by BiP! DB.
-# This is done by reading each openaire-id based ranking file and joining the openaire based
-# score and classes to all the corresponding dois.
-#################################################################################################
-# Imports
-import sys
-
-# Sparksession lib to communicate with cluster via session object
-from pyspark.sql import SparkSession
-
-# Import sql types to define schemas
-from pyspark.sql.types import *
-
-# Import sql functions with shorthand alias
-import pyspark.sql.functions as F
-
-from pyspark.sql.functions import max
-# from pyspark.sql.functions import udf
-#################################################################################################
-#################################################################################################
-# Clean up directory name - no longer needed in final workflow version
-'''
-def clean_directory_name(dir_name):
-    # We have a name with the form *_bip_universe<digits>_* or *_graph_universe<digits>_* 
-    # and we need to keep the parts in *	
-
-    
-    dir_name_parts = dir_name.split('_')
-    dir_name_parts = [part for part in dir_name_parts if ('bip' not in part and 'graph' not in part and 'universe' not in part and 'from' not in part)]
-    
-    dir_name = dir_name.replace("openaire_id_graph", "openaire_ids")
-    clean_name = dir_name + ".txt.gz"
-
-    # clean_name = '_'.join(dir_name_parts)
-
-    # if '_ids' not in clean_name:
-    #     clean_name = clean_name.replace('id_', 'ids_')
-        	
-    # clean_name = clean_name.replace('.txt', '')
-    # clean_name = clean_name.replace('.gz', '')
-
-    # if 'openaire_ids_' in clean_name:
-    #     clean_name = clean_name.replace('openaire_ids_', '')
-        # clean_name = clean_name + '.txt.gz'
-    # else:
-        # clean_name = clean_name + '.txt.gz'
-	
-    return clean_name
-'''
-#################################################################################################
-if len(sys.argv) < 3:
-    print ("Usage: ./map_scores_to_dois.py <synonym_folder> <num_partitions> <score_file_1> <score_file_2> <...etc...>")
-    sys.exit(-1)
-
-# Read arguments
-synonyms_folder = sys.argv[1]
-num_partitions = int(sys.argv[2])
-input_file_list = [argument.replace("_openaire_id_graph", "").replace("_openaire_id_graph_", "") + "_openaire_ids.txt.gz" for argument in sys.argv[3:]]
-# input_file_list = [clean_directory_name(item) for item in input_file_list]
-
-# Prepare output specific variables
-output_file_list = [item.replace("_openaire_ids", "") for item in input_file_list]
-output_file_list = [item + ".txt.gz" if not item.endswith(".txt.gz") else item for item in output_file_list]
-
-# --- INFO MESSAGES --- #
-print ("\n\n----------------------------")
-print ("Mpping openaire ids to DOIs")
-print ("Reading input from: " + synonyms_folder)
-print ("Num partitions: " + str(num_partitions))
-print ("Input files:" + " -- ".join(input_file_list))
-print ("Output files: " + " -- ".join(output_file_list))
-print ("----------------------------\n\n")
-#######################################################################################
-# We weill define the following schemas:
-# --> the schema of the openaire - doi mapping file [string - int - doi_list] (the separator of the doi-list is a non printable character)
-# --> a schema for floating point ranking scores [string - float - string]  (the latter string is the class)
-# --> a schema for integer ranking scores [string - int - string]  (the latter string is the class)
-
-float_schema = StructType([
-	StructField('id', StringType(), False),
-	StructField('score', FloatType(), False),
-	StructField('class', StringType(), False)
-	])
-	
-int_schema = StructType([
-	StructField('id', StringType(), False),
-	StructField('score', IntegerType(), False),
-	StructField('class', StringType(), False)
-	])
-	
-# This schema concerns the output of the file
-# containing the number of references of each doi
-synonyms_schema = StructType([
-	StructField('id', StringType(), False),
-	StructField('num_synonyms', IntegerType(), False),
-    StructField('doi_list', StringType(), False),
-	])
-#######################################################################################
-# Start spark session
-spark = SparkSession.builder.appName('Map openaire scores to DOIs').getOrCreate()
-# Set Log Level for spark session
-spark.sparkContext.setLogLevel('WARN')
-#######################################################################################
-# MAIN Program
-
-# Read and repartition the synonym folder - also cache it since we will need to perform multiple joins
-synonym_df = spark.read.schema(synonyms_schema).option('delimiter', '\t').csv(synonyms_folder)
-synonym_df = synonym_df.select('id',  F.split(F.col('doi_list'), chr(0x02)).alias('doi_list'))
-synonym_df = synonym_df.select('id', F.explode('doi_list').alias('doi')).repartition(num_partitions, 'id').cache()
-
-# TESTING
-# print ("Synonyms: " + str(synonym_df.count()))
-# print ("DF looks like this:" )
-# synonym_df.show(1000, False)
-
-print ("\n\n-----------------------------")
-# Now we need to join the score files on the openaire-id with the synonyms and then keep
-# only doi - score - class and write this to the output
-for offset, input_file in enumerate(input_file_list):
-
-    print ("Mapping scores from " + input_file)
-
-    # Select correct schema
-    schema = int_schema
-    if "attrank" in input_file.lower() or "pr" in input_file.lower() or "ram" in input_file.lower():
-        schema = float_schema
-    
-    # Load file to dataframe
-    ranking_df = spark.read.schema(schema).option('delimiter', '\t').csv(input_file).repartition(num_partitions, 'id')
-
-    # Get max score
-    max_score = ranking_df.select(max('score').alias('max')).collect()[0]['max']
-    print ("Max Score for " + str(input_file) + " is " + str(max_score))
-   
-    # TESTING
-    # print ("Loaded df sample:")
-    # ranking_df.show(1000, False)
-
-    # Join scores to synonyms and keep required fields
-    doi_score_df = synonym_df.join(ranking_df, ['id']).select('doi', 'score', 'class').repartition(num_partitions, 'doi').cache()
-    # Write output
-    output_file = output_file_list[offset]
-    print ("Writing to: " + output_file)
-    doi_score_df.write.mode('overwrite').option('delimiter','\t').option('header',False).csv(output_file, compression='gzip')
-    
-    # Creata another file for the bip update process
-    ranking_df = ranking_df.select('id', 'score', F.lit(F.col('score')/max_score).alias('normalized_score'), 'class', F.col('class').alias('class_dup'))
-    doi_score_df = synonym_df.join(ranking_df, ['id']).select('doi', 'score', 'normalized_score', 'class', 'class_dup').repartition(num_partitions, 'doi').cache()
-    output_file = output_file.replace(".txt.gz", "_for_bip_update.txt.gz")
-    print ("Writing bip update to: " + output_file)
-    doi_score_df.write.mode('overwrite').option('delimiter','\t').option('header',False).csv(output_file, compression='gzip')
- 
-    
-    # Free memory?
-    ranking_df.unpersist(True)
-
-print ("-----------------------------")
-print ("\n\nFinished!\n\n")
-
-
-
-
-
-
-
-
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
@ -17,10 +17,6 @@
 				<name>openaireGraphInputPath</name>
 				<value>${nameNode}/${workingDir}/openaire_id_graph</value>
 			</property>
-			<property>
-				<name>synonymFolder</name>
-				<value>${nameNode}/${workingDir}/openaireid_to_dois/</value>
-			</property>
 			<property>
 				<name>checkpointDir</name>
 				<value>${nameNode}/${workingDir}/check/</value>
@ -32,41 +28,37 @@
 		</configuration>
 	</global>

-	<!-- start using a decision node, so as to determine from which point onwards a job will continue -->
+	<!-- Start using a decision node, to determine from which point onwards a job will continue -->
 	<start to="entry-point-decision" />

 	<decision name="entry-point-decision">
 		<switch>
-			<!-- The default will be set as the normal start, a.k.a. get-doi-synonyms -->
-			<!-- If any different condition is set, go to the corresponding start -->
+
+			<!-- Start from creating the citation network (i.e., normal execution should start from here) -->
+			<case to="create-openaire-ranking-graph">${wf:conf('resume') eq "start"}</case>
+
+			<!-- Different citation-based impact indicators are computed -->
 			<case to="spark-cc">${wf:conf('resume') eq "cc"}</case>
 			<case to="spark-ram">${wf:conf('resume') eq "ram"}</case>
 			<case to="spark-impulse">${wf:conf('resume') eq "impulse"}</case>
 			<case to="spark-pagerank">${wf:conf('resume') eq "pagerank"}</case>
 			<case to="spark-attrank">${wf:conf('resume') eq "attrank"}</case>
-			<!-- <case to="iterative-rankings">${wf:conf('resume') eq "rankings-iterative"}</case> -->
+
+			<!-- Format the results appropriately before transforming them to action sets -->
 			<case to="get-file-names">${wf:conf('resume') eq "format-results"}</case>
-			<case to="map-openaire-to-doi">${wf:conf('resume') eq "map-ids"}</case>
-			<case to="map-scores-to-dois">${wf:conf('resume') eq "map-scores"}</case>
-			<case to="clear-working-dir">${wf:conf('resume') eq "start"}</case>

 			<!-- Aggregation of impact scores on the project level -->
 			<case to="project-impact-indicators">${wf:conf('resume') eq "projects-impact"}</case>
+
+			<!-- Create action sets -->
 			<case to="create-actionset">${wf:conf('resume') eq "create-actionset"}</case>

-			<default to="clear-working-dir" />
+			<!-- The default will be set as the normal start, a.k.a. create-openaire-ranking-graph -->
+			<default to="create-openaire-ranking-graph" />
+
 		</switch>
 	</decision>

-	<action name="clear-working-dir">
-		<fs>
-			<delete path="${workingDir}"/>
-			<mkdir path="${workingDir}"/>
-		</fs>
-		<ok to="create-openaire-ranking-graph"/>
-		<error to="clear-working-dir-fail"/>
-	</action>
-
 	<!-- initial step: create citation network -->
 	<action name="create-openaire-ranking-graph">
 		<spark xmlns="uri:oozie:spark-action:0.2">
@ -304,18 +296,11 @@
 			<capture-output/>
 		</shell>

-		<ok to="format-result-files" />
+		<ok to="format-json-files" />
 		<error to="filename-getting-error" />

 	</action>

-	<!-- Now we will run in parallel the formatting of ranking files for BiP! DB and openaire (json files) -->
-	<fork name="format-result-files">
-		<path start="format-bip-files"/>
-		<path start="format-json-files"/>
-	</fork>
-
-
 	<!-- Format json files -->
 	<!-- Two parts: a) format files b) make the file endings .json.gz -->
 	<action name="format-json-files">
@ -354,139 +339,8 @@
 			<file>${wfAppPath}/format_ranking_results.py#format_ranking_results.py</file>
 		</spark>

-		<ok to="join-file-formatting" />
-		<error to="json-formatting-fail" />
-	</action>
-
-	<!-- This is the second line of parallel workflow execution where we create the BiP! DB files -->
-	<action name="format-bip-files">
-		<!-- This is required as a tag for spark jobs, regardless of programming language -->
-		<spark xmlns="uri:oozie:spark-action:0.2">
-
-			<!-- using configs from an example on openaire -->
-			<master>yarn-cluster</master>
-			<mode>cluster</mode>
-
-			<!-- This is the name of our job -->
-			<name>Format Ranking Results BiP! DB</name>
-			<!-- Script name goes here -->
-			<jar>format_ranking_results.py</jar>
-			<!-- spark configuration options: I've taken most of them from an example from dhp workflows / Master value stolen from sandro -->
-
-			<spark-opts>
-				--executor-memory=${sparkNormalExecutorMemory}
-				--executor-cores=${sparkExecutorCores}
-				--driver-memory=${sparkNormalDriverMemory}
-				--conf spark.executor.memoryOverhead=${sparkNormalExecutorMemory}
-				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
-				--conf spark.extraListeners=${spark2ExtraListeners}
-				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-				--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-				--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-			</spark-opts>
-
-			<!-- Script arguments here -->
-			<arg>zenodo</arg>
-			<!-- Input files must be identified dynamically -->
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']}</arg>
-			<!-- Num partitions -->
-			<arg>${sparkShufflePartitions}</arg>
-			<!-- Type of data to be produced [bip (dois) / openaire (openaire-ids) ] -->
-			<arg>openaire</arg>
-			<!-- This needs to point to the file on the hdfs i think -->
-			<file>${wfAppPath}/format_ranking_results.py#format_ranking_results.py</file>
-		</spark>
-
-		<ok to="join-file-formatting" />
-		<error to="bip-formatting-fail" />
-	</action>
-
-	<!-- Finish formatting jobs -->
-	<join name="join-file-formatting" to="map-openaire-to-doi"/>
-
-	<!-- maps openaire ids to DOIs -->
-	<action name="map-openaire-to-doi">
-		<spark xmlns="uri:oozie:spark-action:0.2">
-
-			<!-- Delete previously created doi synonym folder -->
-			<prepare>
-				<delete path="${synonymFolder}"/>
-			</prepare>
-
-			<master>yarn-cluster</master>
-			<mode>cluster</mode>
-			<name>Openaire-DOI synonym collection</name>
-			<jar>map_openaire_ids_to_dois.py</jar>
-
-			<spark-opts>
-				--executor-memory=${sparkHighExecutorMemory}
-				--executor-cores=${sparkExecutorCores}
-				--driver-memory=${sparkHighDriverMemory}
-				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
-				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
-				--conf spark.extraListeners=${spark2ExtraListeners}
-				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-				--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-				--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-			</spark-opts>
-
-			<!-- Script arguments here -->
-			<arg>${openaireDataInput}/</arg>
-			<!-- number of partitions to be used on joins -->
-			<arg>${synonymFolder}</arg>
-
-			<file>${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py</file>
-		</spark>
-
-		<ok to="map-scores-to-dois" />
-		<error to="synonym-collection-fail" />
-
-	</action>
-
-	<!-- mapping openaire scores to DOIs -->
-	<action name="map-scores-to-dois">
-		<!-- This is required as a tag for spark jobs, regardless of programming language -->
-		<spark xmlns="uri:oozie:spark-action:0.2">
-
-			<!-- using configs from an example on openaire -->
-			<master>yarn-cluster</master>
-			<mode>cluster</mode>
-			<name>Mapping Openaire Scores to DOIs</name>
-			<jar>map_scores_to_dois.py</jar>
-
-			<spark-opts>
-				--executor-memory=${sparkHighExecutorMemory}
-				--executor-cores=${sparkExecutorCores}
-				--driver-memory=${sparkHighDriverMemory}
-				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
-				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
-				--conf spark.extraListeners=${spark2ExtraListeners}
-				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-				--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-				--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-			</spark-opts>
-
-			<!-- Script arguments here -->
-			<arg>${synonymFolder}</arg>
-			<!-- Number of partitions -->
-			<arg>${sparkShufflePartitions}</arg>
-			<!-- The remaining input are the ranking files fproduced for bip db-->
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}</arg>
-			<arg>${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']}</arg>
-
-			<file>${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py</file>
-		</spark>
-
 		<ok to="project-impact-indicators" />
-		<error to="map-scores-fail" />
-
+		<error to="json-formatting-fail" />
 	</action>

 	<action name="project-impact-indicators">
@ -603,18 +457,6 @@
 		<message>Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 	</kill>

-	<kill name="bip-formatting-fail">
-		<message>Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-	</kill>
-
-	<kill name="synonym-collection-fail">
-		<message>Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-	</kill>
-
-	<kill name="map-scores-fail">
-		<message>Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-	</kill>
-
 	<kill name="actionset-delete-fail">
 		<message>Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 	</kill>
@ -627,10 +469,6 @@
 		<message>Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 	</kill>

-	<kill name="clear-working-dir-fail">
-		<message>Re-create working dir failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-	</kill>
-
 	<!-- Define ending node -->
 	<end name="end" />

--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
@ -32,7 +32,7 @@ select distinct * from (
       from SOURCE.result r
                join SOURCE.result_projects rp on rp.id=r.id
                join SOURCE.project p on p.id=rp.project
-                join openaire_prod_stats_monitor_ie_20231226b.irish_funders irf on irf.funder=p.funder
+                join TARGET.irish_funders irf on irf.funder=p.funder
       union all
       select r.*
       from SOURCE.result r
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
@ -1,79 +1,3 @@
--drop database if exists TARGET cascade;
--create database if not exists TARGET;
--
--create view if not exists TARGET.category as select * from SOURCE.category;
--create view if not exists TARGET.concept as select * from SOURCE.concept;
--create view if not exists TARGET.context as select * from SOURCE.context;
--create view if not exists TARGET.country as select * from SOURCE.country;
--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
--create view if not exists TARGET.funder as select * from SOURCE.funder;
--create view if not exists TARGET.fundref as select * from SOURCE.fundref;
--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--
--create table TARGET.result stored as parquet as
--    select distinct * from (
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
--             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
--             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
--             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
--             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
--             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
--             'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
--             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
--             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
--             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
--             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
--             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
--             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
--             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
--             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
--             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
--             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
--             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
--             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
--             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
--             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
--             'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
--             'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
--             'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
--             'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
--             'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
--             'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
--             'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
--             'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
--             'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
--             'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
--             'openorgs____::e15adb13c4dadd49de4d35c39b5da93a',  -- Nanyang Technological University
--             'openorgs____::4b34103bde246228fcd837f5f1bf4212',  -- Autonomous University of Barcelona
--             'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb',	-- McMaster University
--             'openorgs____::51c7fc556e46381734a25a6fbc3fd398',	-- University of Modena and Reggio Emilia
--             'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db',	-- Bilkent University
--             'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06',	-- Saints Cyril and Methodius University of Skopje
--             'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
--             'openorgs____::b8b8ca674452579f3f593d9f5e557483',   -- University College Cork
--             'openorgs____::38d7097854736583dde879d12dacafca'	-- Brown University
--             'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
--             'openorgs____::2530baca8a15936ba2e3297f2bce2e7e',	-- University of Cape Town
--             'openorgs____::d11f981828c485cd23d93f7f24f24db1',  -- Technological University Dublin
--             'openorgs____::5e6bf8962665cdd040341171e5c631d8',  -- Delft University of Technology
--             'openorgs____::846cb428d3f52a445f7275561a7beb5d',  -- University of Manitoba
--             'openorgs____::eb391317ed0dc684aa81ac16265de041',	-- Universitat Rovira i Virgili
--             'openorgs____::66aa9fc2fceb271423dfabcc38752dc0',  -- Lund University
--             'openorgs____::3cff625a4370d51e08624cc586138b2f'	-- IMT Atlantique
--        ) )) foo;
--
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
-
 create view if not exists TARGET.category as select * from SOURCE.category;
 create view if not exists TARGET.concept as select * from SOURCE.concept;
 create view if not exists TARGET.context as select * from SOURCE.context;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
@ -81,7 +81,17 @@ create table TARGET.result stored as parquet as
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
+             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
+             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
+             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
+             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
+             'openorgs____::b316f25380d106aac402f5ae8653910d',  -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::45a2076eee3013e0e85625ce61bcd272',  -- Institut d'Investigació Sanitària Illes Balears
+             'openorgs____::00b20b0a743a96169e6cf135e6e2bd7c',  -- Universidad Publica De Navarra
+             'openorgs____::0f398605c2459294d125ff23473a97dc',  -- Aalto University
+             'openorgs____::25b1fa62c7fd8e409d3a83c07e04b2d4',  -- WHU-Otto Beisheim School of Management
+             'openorgs____::d6eec313417f11205db4e736a34c0db6',  -- KEMPELENOV INSTITUT INTELIGENTNYCH TECHNOLOGII
+             'openorgs____::c2dfb90e797a2dc52f0084c549289d0c'  -- National Research Institute for Agriculture, Food and Environment
        ))) foo;

 --ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
@ -61,7 +61,17 @@ create table TARGET.result stored as parquet as
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
+             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
+             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
+             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
+             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
+             'openorgs____::b316f25380d106aac402f5ae8653910d',  -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::45a2076eee3013e0e85625ce61bcd272',  -- Institut d'Investigació Sanitària Illes Balears
+             'openorgs____::00b20b0a743a96169e6cf135e6e2bd7c',  -- Universidad Publica De Navarra
+             'openorgs____::0f398605c2459294d125ff23473a97dc',  -- Aalto University
+             'openorgs____::25b1fa62c7fd8e409d3a83c07e04b2d4',  -- WHU-Otto Beisheim School of Management
+             'openorgs____::d6eec313417f11205db4e736a34c0db6',  -- KEMPELENOV INSTITUT INTELIGENTNYCH TECHNOLOGII
+             'openorgs____::c2dfb90e797a2dc52f0084c549289d0c'  -- National Research Institute for Agriculture, Food and Environment
        )))  foo;

 --ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Giambattista Bloisi	56224e034a	Fill the new mergedIds field when generating dedup records Filter out dedup records composed of invisible records only Filter out mergerels that have not been used when creating the dedup record (ungrouping of cliques)	2024-10-28 13:31:01 +01:00
Claudio Atzori	46dbb62598	Merge pull request '#9839 : include claimed affiliation relationships' (#476 ) from claim-orgs into beta Reviewed-on: #476	2024-10-25 10:12:59 +02:00
Claudio Atzori	d3764265d5	Merge pull request '[dedup] avoid NPEs in the countryInference dedup utility' (#475 ) from dedup_countryInference_NPE into beta Reviewed-on: #475	2024-10-25 10:12:06 +02:00
Claudio Atzori	4a9aeb6238	Merge pull request '9126-impact-indicators-wf-optimisation' (#471 ) from 9126-impact-indicators-wf-optimisation into beta Reviewed-on: #471	2024-10-25 10:10:44 +02:00
Claudio Atzori	8172bee8c8	Merge pull request 'Minor fixes' (#496 ) from beta_fixes_oct into beta Reviewed-on: #496	2024-10-25 10:09:56 +02:00
Miriam Baglioni	e75326d6ec	[FundersMatchFromCrossref] added match from CrossRef to DFG unidentified project	2024-10-25 09:13:54 +02:00
Giambattista Bloisi	6bc741715c	Fix OafMapperUtilsTest.testMergePubs	2024-10-23 14:02:45 +02:00
Giambattista Bloisi	aa7b8fd014	Use workingDir parameter for temporary data of ORCID enrichment	2024-10-23 14:02:17 +02:00
Giambattista Bloisi	0e34b0ece1	Fix imports: point them from the main distribution packages	2024-10-23 14:01:52 +02:00
Giambattista Bloisi	56b05cde0b	Revert the changes for IgnoreUndefined management in tree evaluation	2024-10-11 10:35:15 +02:00
Claudio Atzori	62ff843334	adopting dhp-schemas:8.0.1 to support Auhtor's rawAffiliationString(s). Improved graph2hive implementation	2024-10-08 16:22:54 +02:00
Claudio Atzori	d5867a1992	merged #490	2024-10-08 15:39:59 +02:00
Claudio Atzori	e5df68772d	[graph provision] fixed serialisation of the usage counts as measures in the XML records	2024-10-02 09:35:21 +02:00
Miriam Baglioni	7e6d12fa77	[UsageCount] fixed error (cherry picked from commit `9c9a9562ae`)	2024-10-01 15:55:07 +02:00
Miriam Baglioni	191fc3a461	[UsageCount] add check in case the datasource is not matched against those present in the graph (cherry picked from commit `b42bdd5fb3`)	2024-10-01 15:54:31 +02:00
Claudio Atzori	10696f2a44	reverted procedure for creating the UsageCounts actionset	2024-10-01 15:54:13 +02:00
Claudio Atzori	5734b80861	Merge pull request 'datasource table creation split in steps' (#489 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #489	2024-09-30 16:34:38 +02:00
Antonis Lempesis	f3c179658a	datasource table creation split in steps	2024-09-30 17:12:21 +03:00
Miriam Baglioni	b18ad035c1	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2024-09-30 15:10:44 +02:00
Miriam Baglioni	e430826e00	[ImportOC] fix to move original folder instead of extracted ones	2024-09-30 15:10:10 +02:00
Giambattista Bloisi	c45cae447a	Fix: invert the "natural" order when ordering by id lexicographically	2024-09-26 17:08:02 +02:00
Claudio Atzori	3fcafc7ed6	Merge pull request 'Latest institutions in monitor dbs' (#472 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #472	2024-09-26 09:49:01 +02:00
Miriam Baglioni	599e56dbc6	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2024-09-25 17:28:23 +02:00
Claudio Atzori	6397141e56	code formatting	2024-09-25 15:27:32 +02:00
Claudio Atzori	e354f9853a	[OpenCitations] move the extracted contents under a backup path to avoid needing to re-download it in case of errors	2024-09-25 15:27:02 +02:00
Claudio Atzori	535a7b99f1	the metadata collection plugins using the HttpConnector2 class shall now retry instead of failing in case of UnknownHostException	2024-09-25 11:35:34 +02:00
Sandro La Bruzzo	6a097abc89	as described on ticket #9525 1. Changed the mapping applied to Crossref records: anything that has a relationship "is-review-of" must be mapped as publication of type "Review". 2. Force the hostedby of Crossref records with DOI prefix 10.3410 and 10.12703 to the H1 Connect data source.	2024-09-25 11:32:54 +02:00
Michele Artini	9754521847	Merge pull request 'fixed a bug with id' (#486 ) from osfPreprints_plugin into beta Reviewed-on: #486	2024-09-25 10:02:24 +02:00
Michele Artini	54f8b4da39	Merge pull request 'fixed a bug with 'null' string' (#484 ) from osfPreprints_plugin into beta Reviewed-on: #484	2024-09-24 15:19:54 +02:00
Miriam Baglioni	4d3e079590	Merge remote-tracking branch 'origin/beta' into beta	2024-09-24 14:26:29 +02:00
Michele Artini	e941adbe2b	fixed a bug with topic ENRICH/MORE/SUBJECT/ARXIV	2024-09-24 08:57:37 +02:00
Michele Artini	fdbe629f49	removed the deletedByInference=true filter	2024-09-23 15:27:28 +02:00
Antonis Lempesis	619aa34a15	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into beta	2024-09-23 15:25:59 +03:00
Antonis Lempesis	dbea7a4072	removed duplicate line	2024-09-23 14:57:11 +03:00
Antonis Lempesis	c9241dba0d	Merge pull request 'convert_hive_to_spark_actions' (#1 ) from convert_hive_to_spark_actions into beta Reviewed-on: antonis.lempesis/dnet-hadoop#1	2024-09-23 13:53:28 +02:00
Michele Artini	755a5aefcf	Merge pull request 'osfPreprints_plugin' (#482 ) from osfPreprints_plugin into beta Reviewed-on: #482	2024-09-23 10:21:34 +02:00
Michele Artini	db6f137cf9	Merge pull request 'osfPreprints_plugin' (#480 ) from osfPreprints_plugin into beta Reviewed-on: #480	2024-09-20 09:56:50 +02:00
Alessia	07e6e7b4d6	#9839 : include claimed affiliation relationships	2024-09-16 13:41:56 +02:00
Antonis Lempesis	37ad259296	cleanup	2024-09-05 16:02:44 +03:00
Antonis Lempesis	b64c144abf	added new institutions	2024-09-05 16:00:09 +03:00
Serafeim Chatzopoulos	b043f8a963	Remove redundant error messages from impact indicators workflow	2024-09-04 14:28:43 +03:00
Serafeim Chatzopoulos	db03f85366	Remove steps for updating BIP! from the impact indicators workflow	2024-09-04 14:25:44 +03:00
Miriam Baglioni	468f2aa5a5	[AffiliationAffRo]align beta with new affiliation from publisher webpage introduced in production. AffRo collectedfrom OpenAIRE to discriminate against WebCrawl	2024-08-12 18:10:46 +02:00
Miriam Baglioni	89fcf4086c	[Person]fix issue in affiliation relation id construction for person (missing ::)	2024-08-12 18:04:43 +02:00
Miriam Baglioni	8c185a7b1a	resolving conflicts	2024-08-05 17:14:11 +02:00
Miriam Baglioni	985ca15264	[openaire-affiliation]removes matchings without DOI	2024-08-05 12:10:40 +02:00
Claudio Atzori	75a11d0ba5	[dedup] avoid NPEs in the countryInference dedup utility	2024-07-25 16:34:32 +02:00
Antonis Lempesis	d0590e0e49	added latest institutions	2024-07-23 15:17:15 +03:00
Antonis Lempesis	7d2c0a3723	added new institutions	2024-07-23 15:10:17 +03:00
Lampros Smyrnaios	e9686365a2	Improve performance of creating the "result_fos" table, by using a temp-table to cache data, which is requested multiple times.	2024-07-03 20:24:36 +03:00
Lampros Smyrnaios	ce0aee21cc	Improve performance of transferring the stats-DBs to another cluster and querying the DBs' tables, by ordering Spark to create up to 100 files per table, instead of thousands.	2024-07-03 20:15:33 +03:00
Lampros Smyrnaios	7b7dd32ad5	- Fix placement of some "set mapred.job.queue.name=analytics" statements and remove their unused "/EOS/" indicator. - Add stacktrace-info to failed actions.	2024-07-03 19:53:24 +03:00
Lampros Smyrnaios	7ce051d766	- Update the remaining hive-actions to spark-actions. - Update the version of shell-actions. - Fix missing "/EOS/" indicators.	2024-07-03 19:49:19 +03:00
Lampros Smyrnaios	aa4d7d5e20	Prioritize the rest of the stats-queries over other tasks on the cluster, by putting them in the "analytics" queue.	2024-07-03 19:14:25 +03:00
Lampros Smyrnaios	54e11b6a43	Improve performance and efficiency by rewriting the creation process of "publication", "project", "dataset", "datasource", "software", "otherresearchproduct" and "result" tables, to be performed in a single query, for each one.	2024-07-03 13:03:15 +03:00
Lampros Smyrnaios	fe2275a9b0	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions # Conflicts: # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql	2024-06-25 20:17:47 +03:00
Lampros Smyrnaios	a644a6f4fe	Catch Spark-sql errors and show a log with the statement that failed.	2024-05-29 12:10:11 +03:00
Lampros Smyrnaios	888637773c	Add missing "/EOS/" comments.	2024-05-27 12:34:49 +03:00
Lampros Smyrnaios	e0ac494859	Merge branch 'beta' into convert_hive_to_spark_actions # Conflicts: # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql	2024-05-27 12:27:40 +03:00
Lampros Smyrnaios	3c17183d10	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-23 17:18:16 +03:00
Lampros Smyrnaios	69a9ac7393	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-22 17:07:11 +03:00
Lampros Smyrnaios	342223f75c	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-19 13:18:34 +03:00
Lampros Smyrnaios	2616971e2b	dhp-stats-update: remove leftover duplicate line	2024-04-18 16:18:16 +03:00
Lampros Smyrnaios	ba533d9f34	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-18 15:47:56 +03:00
Lampros Smyrnaios	d46b78b659	dhp-stats-update: - Set Steps 2-7 and 9 to limit the amount of files generated by Spark, from 8000, down to 100, to improve file-transfer and querying performance. - Allow the workflow to run up to Step10. The Step11 seems to have some issues even when using hive-action.	2024-04-18 15:40:27 +03:00
Lampros Smyrnaios	6f2ebb2a52	Revert Step8 and Step11 to use Hive again, since their "UPDATE" statements are not supported by Spark.	2024-04-18 15:35:03 +03:00
Lampros Smyrnaios	ca091c0f1e	dhp-stats-update: - Fix not passing some parameters to some Spark actions. - Allow the workflow to run up to Step7. The first 7 steps seem to work out of the box.	2024-04-17 14:03:59 +03:00
Lampros Smyrnaios	0b897f2f66	Fix and add missing "DROP TABLE" statements, in "dhp-stats-update" sql-scripts.	2024-04-16 18:17:54 +03:00
Lampros Smyrnaios	db33f7727c	Update "dhp-stats-update" workflow to use "spark"-actions, instead of "hive" ones. Note: Currently the code is set to only test the "Step1".	2024-04-15 16:22:40 +03:00