[affRo] added option to run on crossref

merging with branch beta
[affMatchings] adding choice to run the algo on oalexdata and get specific branch instead of release of affro
2024-10-24 11:49:13 +02:00 · 2024-10-21 08:56:08 +02:00 · 2024-10-18 13:58:14 +02:00 · 2024-10-11 10:35:15 +02:00 · 2024-10-08 16:22:54 +02:00 · 2024-10-08 15:39:59 +02:00
113 changed files with 2770 additions and 3096 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -7,12 +7,12 @@ import java.sql.*;
 import java.util.function.Consumer;

 import org.apache.commons.lang3.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;

 public class DbClient implements Closeable {

-	private static final Logger log = LoggerFactory.getLogger(DbClient.class);
+	private static final Log log = LogFactory.getLog(DbClient.class);

 	private final Connection connection;

@ -37,8 +37,6 @@ public class DbClient implements Closeable {
 		try (final Statement stmt = connection.createStatement()) {
 			stmt.setFetchSize(100);

-			log.info("running SQL:\n\n{}\n\n", sql);
-
 			try (final ResultSet rs = stmt.executeQuery(sql)) {
 				while (rs.next()) {
 					consumer.accept(rs);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import okhttp3.MediaType;
+import okhttp3.RequestBody;
+import okhttp3.internal.Util;
+import okio.BufferedSink;
+import okio.Okio;
+import okio.Source;
+
+public class InputStreamRequestBody extends RequestBody {
+
+	private final InputStream inputStream;
+	private final MediaType mediaType;
+	private final long lenght;
+
+	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
+
+		return new InputStreamRequestBody(inputStream, mediaType, len);
+	}
+
+	private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
+		this.inputStream = inputStream;
+		this.mediaType = mediaType;
+		this.lenght = len;
+	}
+
+	@Override
+	public MediaType contentType() {
+		return mediaType;
+	}
+
+	@Override
+	public long contentLength() {
+
+		return lenght;
+
+	}
+
+	@Override
+	public void writeTo(BufferedSink sink) throws IOException {
+		Source source = null;
+		try {
+			source = Okio.source(inputStream);
+			sink.writeAll(source);
+		} finally {
+			Util.closeQuietly(source);
+		}
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@ -0,0 +1,8 @@
+
+package eu.dnetlib.dhp.common.api;
+
+public class MissingConceptDoiException extends Throwable {
+	public MissingConceptDoiException(String message) {
+		super(message);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -0,0 +1,363 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.*;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.http.HttpHeaders;
+import org.apache.http.entity.ContentType;
+import org.jetbrains.annotations.NotNull;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
+import okhttp3.*;
+
+public class ZenodoAPIClient implements Serializable {
+
+	String urlString;
+	String bucket;
+
+	String deposition_id;
+	String access_token;
+
+	public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
+
+	private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
+
+	public String getUrlString() {
+		return urlString;
+	}
+
+	public void setUrlString(String urlString) {
+		this.urlString = urlString;
+	}
+
+	public String getBucket() {
+		return bucket;
+	}
+
+	public void setBucket(String bucket) {
+		this.bucket = bucket;
+	}
+
+	public void setDeposition_id(String deposition_id) {
+		this.deposition_id = deposition_id;
+	}
+
+	public ZenodoAPIClient(String urlString, String access_token) {
+
+		this.urlString = urlString;
+		this.access_token = access_token;
+	}
+
+	/**
+	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
+	 *
+	 * @return response code
+	 * @throws IOException
+	 */
+	public int newDeposition() throws IOException {
+		String json = "{}";
+
+		URL url = new URL(urlString);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setRequestMethod("POST");
+		conn.setDoOutput(true);
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+		conn.disconnect();
+
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
+		this.bucket = newSubmission.getLinks().getBucket();
+		this.deposition_id = newSubmission.getId();
+
+		return responseCode;
+	}
+
+	/**
+	 * Upload files in Zenodo.
+	 *
+	 * @param is the inputStream for the file to upload
+	 * @param file_name the name of the file as it will appear on Zenodo
+	 * @return the response code
+	 */
+	public int uploadIS(InputStream is, String file_name) throws IOException {
+
+		URL url = new URL(bucket + "/" + file_name);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("PUT");
+
+		byte[] buf = new byte[8192];
+		int length;
+		try (OutputStream os = conn.getOutputStream()) {
+			while ((length = is.read(buf)) != -1) {
+				os.write(buf, 0, length);
+			}
+
+		}
+		int responseCode = conn.getResponseCode();
+		if (!checkOKStatus(responseCode)) {
+			throw new IOException("Unexpected code " + responseCode + getBody(conn));
+		}
+
+		return responseCode;
+	}
+
+	@NotNull
+	private String getBody(HttpURLConnection conn) throws IOException {
+		String body = "{}";
+		try (BufferedReader br = new BufferedReader(
+			new InputStreamReader(conn.getInputStream(), "utf-8"))) {
+			StringBuilder response = new StringBuilder();
+			String responseLine = null;
+			while ((responseLine = br.readLine()) != null) {
+				response.append(responseLine.trim());
+			}
+
+			body = response.toString();
+
+		}
+		return body;
+	}
+
+	/**
+	 * Associates metadata information to the current deposition
+	 *
+	 * @param metadata the metadata
+	 * @return response code
+	 * @throws IOException
+	 */
+	public int sendMretadata(String metadata) throws IOException {
+
+		URL url = new URL(urlString + "/" + deposition_id);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("PUT");
+
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = metadata.getBytes("utf-8");
+			os.write(input, 0, input.length);
+
+		}
+
+		final int responseCode = conn.getResponseCode();
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + getBody(conn));
+
+		return responseCode;
+
+	}
+
+	private boolean checkOKStatus(int responseCode) {
+
+		if (HttpURLConnection.HTTP_OK != responseCode ||
+			HttpURLConnection.HTTP_CREATED != responseCode)
+			return true;
+		return false;
+	}
+
+	/**
+	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
+	 *
+	 * @return response code
+	 * @throws IOException
+	 */
+	@Deprecated
+	public int publish() throws IOException {
+
+		String json = "{}";
+
+		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+
+		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
+
+		Request request = new Request.Builder()
+			.url(urlString + "/" + deposition_id + "/actions/publish")
+			.addHeader("Authorization", "Bearer " + access_token)
+			.post(body)
+			.build();
+
+		try (Response response = httpClient.newCall(request).execute()) {
+
+			if (!response.isSuccessful())
+				throw new IOException("Unexpected code " + response + response.body().string());
+
+			return response.code();
+
+		}
+	}
+
+	/**
+	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
+	 * for the new version.
+	 *
+	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
+	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
+	 *            concept_rec_id = 656930
+	 * @return response code
+	 */
+	public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
+		setDepositionId(concept_rec_id, 1);
+		String json = "{}";
+
+		URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("POST");
+
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+		String latest_draft = zenodoModel.getLinks().getLatest_draft();
+		deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
+		bucket = getBucket(latest_draft);
+
+		return responseCode;
+
+	}
+
+	/**
+	 * To finish uploading a version or new deposition not published
+	 * It sets the deposition_id and the bucket to be used
+	 *
+	 *
+	 * @param deposition_id the deposition id of the not yet published upload
+	 *            concept_rec_id = 656930
+	 * @return response code
+	 * @throws IOException
+	 * @throws MissingConceptDoiException
+	 */
+	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
+
+		this.deposition_id = deposition_id;
+
+		String json = "{}";
+
+		URL url = new URL(urlString + "/" + deposition_id);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setRequestMethod("POST");
+		conn.setDoOutput(true);
+		try (OutputStream os = conn.getOutputStream()) {
+			byte[] input = json.getBytes("utf-8");
+			os.write(input, 0, input.length);
+		}
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+		conn.disconnect();
+
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+		bucket = zenodoModel.getLinks().getBucket();
+
+		return responseCode;
+
+	}
+
+	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
+
+		ZenodoModelList zenodoModelList = new Gson()
+			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
+
+		for (ZenodoModel zm : zenodoModelList) {
+			if (zm.getConceptrecid().equals(concept_rec_id)) {
+				deposition_id = zm.getId();
+				return;
+			}
+		}
+		if (zenodoModelList.size() == 0)
+			throw new MissingConceptDoiException(
+				"The concept record id specified was missing in the list of depositions");
+		setDepositionId(concept_rec_id, page + 1);
+
+	}
+
+	private String getPrevDepositions(String page) throws IOException {
+
+		HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
+		urlBuilder.addQueryParameter("page", page);
+
+		URL url = new URL(urlBuilder.build().toString());
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("GET");
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		return body;
+
+	}
+
+	private String getBucket(String inputUurl) throws IOException {
+
+		URL url = new URL(inputUurl);
+		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+		conn.setDoOutput(true);
+		conn.setRequestMethod("GET");
+
+		String body = getBody(conn);
+
+		int responseCode = conn.getResponseCode();
+
+		conn.disconnect();
+		if (!checkOKStatus(responseCode))
+			throw new IOException("Unexpected code " + responseCode + body);
+
+		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+
+		return zenodoModel.getLinks().getBucket();
+
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Community {
+	private String identifier;
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public void setIdentifier(String identifier) {
+		this.identifier = identifier;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -0,0 +1,47 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Creator {
+	private String affiliation;
+	private String name;
+	private String orcid;
+
+	public String getAffiliation() {
+		return affiliation;
+	}
+
+	public void setAffiliation(String affiliation) {
+		this.affiliation = affiliation;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public String getOrcid() {
+		return orcid;
+	}
+
+	public void setOrcid(String orcid) {
+		this.orcid = orcid;
+	}
+
+	public static Creator newInstance(String name, String affiliation, String orcid) {
+		Creator c = new Creator();
+		if (name != null) {
+			c.name = name;
+		}
+		if (affiliation != null) {
+			c.affiliation = affiliation;
+		}
+		if (orcid != null) {
+			c.orcid = orcid;
+		}
+
+		return c;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -0,0 +1,44 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class File implements Serializable {
+	private String checksum;
+	private String filename;
+	private long filesize;
+	private String id;
+
+	public String getChecksum() {
+		return checksum;
+	}
+
+	public void setChecksum(String checksum) {
+		this.checksum = checksum;
+	}
+
+	public String getFilename() {
+		return filename;
+	}
+
+	public void setFilename(String filename) {
+		this.filename = filename;
+	}
+
+	public long getFilesize() {
+		return filesize;
+	}
+
+	public void setFilesize(long filesize) {
+		this.filesize = filesize;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@ -0,0 +1,23 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Grant implements Serializable {
+	private String id;
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public static Grant newInstance(String id) {
+		Grant g = new Grant();
+		g.id = id;
+
+		return g;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@ -0,0 +1,92 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Links implements Serializable {
+
+	private String bucket;
+
+	private String discard;
+
+	private String edit;
+	private String files;
+	private String html;
+	private String latest_draft;
+	private String latest_draft_html;
+	private String publish;
+
+	private String self;
+
+	public String getBucket() {
+		return bucket;
+	}
+
+	public void setBucket(String bucket) {
+		this.bucket = bucket;
+	}
+
+	public String getDiscard() {
+		return discard;
+	}
+
+	public void setDiscard(String discard) {
+		this.discard = discard;
+	}
+
+	public String getEdit() {
+		return edit;
+	}
+
+	public void setEdit(String edit) {
+		this.edit = edit;
+	}
+
+	public String getFiles() {
+		return files;
+	}
+
+	public void setFiles(String files) {
+		this.files = files;
+	}
+
+	public String getHtml() {
+		return html;
+	}
+
+	public void setHtml(String html) {
+		this.html = html;
+	}
+
+	public String getLatest_draft() {
+		return latest_draft;
+	}
+
+	public void setLatest_draft(String latest_draft) {
+		this.latest_draft = latest_draft;
+	}
+
+	public String getLatest_draft_html() {
+		return latest_draft_html;
+	}
+
+	public void setLatest_draft_html(String latest_draft_html) {
+		this.latest_draft_html = latest_draft_html;
+	}
+
+	public String getPublish() {
+		return publish;
+	}
+
+	public void setPublish(String publish) {
+		this.publish = publish;
+	}
+
+	public String getSelf() {
+		return self;
+	}
+
+	public void setSelf(String self) {
+		this.self = self;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@ -0,0 +1,153 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class Metadata implements Serializable {
+
+	private String access_right;
+	private List<Community> communities;
+	private List<Creator> creators;
+	private String description;
+	private String doi;
+	private List<Grant> grants;
+	private List<String> keywords;
+	private String language;
+	private String license;
+	private PrereserveDoi prereserve_doi;
+	private String publication_date;
+	private List<String> references;
+	private List<RelatedIdentifier> related_identifiers;
+	private String title;
+	private String upload_type;
+	private String version;
+
+	public String getUpload_type() {
+		return upload_type;
+	}
+
+	public void setUpload_type(String upload_type) {
+		this.upload_type = upload_type;
+	}
+
+	public String getVersion() {
+		return version;
+	}
+
+	public void setVersion(String version) {
+		this.version = version;
+	}
+
+	public String getAccess_right() {
+		return access_right;
+	}
+
+	public void setAccess_right(String access_right) {
+		this.access_right = access_right;
+	}
+
+	public List<Community> getCommunities() {
+		return communities;
+	}
+
+	public void setCommunities(List<Community> communities) {
+		this.communities = communities;
+	}
+
+	public List<Creator> getCreators() {
+		return creators;
+	}
+
+	public void setCreators(List<Creator> creators) {
+		this.creators = creators;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public void setDescription(String description) {
+		this.description = description;
+	}
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public List<Grant> getGrants() {
+		return grants;
+	}
+
+	public void setGrants(List<Grant> grants) {
+		this.grants = grants;
+	}
+
+	public List<String> getKeywords() {
+		return keywords;
+	}
+
+	public void setKeywords(List<String> keywords) {
+		this.keywords = keywords;
+	}
+
+	public String getLanguage() {
+		return language;
+	}
+
+	public void setLanguage(String language) {
+		this.language = language;
+	}
+
+	public String getLicense() {
+		return license;
+	}
+
+	public void setLicense(String license) {
+		this.license = license;
+	}
+
+	public PrereserveDoi getPrereserve_doi() {
+		return prereserve_doi;
+	}
+
+	public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
+		this.prereserve_doi = prereserve_doi;
+	}
+
+	public String getPublication_date() {
+		return publication_date;
+	}
+
+	public void setPublication_date(String publication_date) {
+		this.publication_date = publication_date;
+	}
+
+	public List<String> getReferences() {
+		return references;
+	}
+
+	public void setReferences(List<String> references) {
+		this.references = references;
+	}
+
+	public List<RelatedIdentifier> getRelated_identifiers() {
+		return related_identifiers;
+	}
+
+	public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
+		this.related_identifiers = related_identifiers;
+	}
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class PrereserveDoi implements Serializable {
+	private String doi;
+	private String recid;
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public String getRecid() {
+		return recid;
+	}
+
+	public void setRecid(String recid) {
+		this.recid = recid;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@ -0,0 +1,43 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class RelatedIdentifier implements Serializable {
+	private String identifier;
+	private String relation;
+	private String resource_type;
+	private String scheme;
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public void setIdentifier(String identifier) {
+		this.identifier = identifier;
+	}
+
+	public String getRelation() {
+		return relation;
+	}
+
+	public void setRelation(String relation) {
+		this.relation = relation;
+	}
+
+	public String getResource_type() {
+		return resource_type;
+	}
+
+	public void setResource_type(String resource_type) {
+		this.resource_type = resource_type;
+	}
+
+	public String getScheme() {
+		return scheme;
+	}
+
+	public void setScheme(String scheme) {
+		this.scheme = scheme;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@ -0,0 +1,118 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class ZenodoModel implements Serializable {
+
+	private String conceptrecid;
+	private String created;
+
+	private List<File> files;
+	private String id;
+	private Links links;
+	private Metadata metadata;
+	private String modified;
+	private String owner;
+	private String record_id;
+	private String state;
+	private boolean submitted;
+	private String title;
+
+	public String getConceptrecid() {
+		return conceptrecid;
+	}
+
+	public void setConceptrecid(String conceptrecid) {
+		this.conceptrecid = conceptrecid;
+	}
+
+	public String getCreated() {
+		return created;
+	}
+
+	public void setCreated(String created) {
+		this.created = created;
+	}
+
+	public List<File> getFiles() {
+		return files;
+	}
+
+	public void setFiles(List<File> files) {
+		this.files = files;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public Links getLinks() {
+		return links;
+	}
+
+	public void setLinks(Links links) {
+		this.links = links;
+	}
+
+	public Metadata getMetadata() {
+		return metadata;
+	}
+
+	public void setMetadata(Metadata metadata) {
+		this.metadata = metadata;
+	}
+
+	public String getModified() {
+		return modified;
+	}
+
+	public void setModified(String modified) {
+		this.modified = modified;
+	}
+
+	public String getOwner() {
+		return owner;
+	}
+
+	public void setOwner(String owner) {
+		this.owner = owner;
+	}
+
+	public String getRecord_id() {
+		return record_id;
+	}
+
+	public void setRecord_id(String record_id) {
+		this.record_id = record_id;
+	}
+
+	public String getState() {
+		return state;
+	}
+
+	public void setState(String state) {
+		this.state = state;
+	}
+
+	public boolean isSubmitted() {
+		return submitted;
+	}
+
+	public void setSubmitted(boolean submitted) {
+		this.submitted = submitted;
+	}
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.util.ArrayList;
+
+public class ZenodoModelList extends ArrayList<ZenodoModel> {
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
@ -65,7 +65,13 @@ public class RunSQLSparkJob {
 				for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) {
 					log.info("executing: {}", statement);
 					long startTime = System.currentTimeMillis();
-					spark.sql(statement).show();
+					try {
+						spark.sql(statement).show();
+					} catch (Exception e) {
+						log.error("Error executing statement: {}", statement, e);
+						System.err.println("Error executing statement: " + statement + "\n" + e);
+						throw e;
+					}
 					log
 						.info(
 							"executed in {}",
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 							.getContext()
 							.stream()
 							.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
-							.collect(Collectors.toCollection(ArrayList::new)));
+							.collect(Collectors.toList()));
 			}
 			return (T) res;
 		} else {
@ -1015,41 +1015,4 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 			.orElse(null);
 	}

-	/**
-	 * Implements bad and ugly things that we should get rid of ASAP.
-	 *
-	 * @param value
-	 * @return
-	 * @param <T>
-	 */
-	public static <T extends Oaf> T dedicatedUglyHacks(T value) {
-		if (value instanceof OafEntity) {
-			if (value instanceof Result) {
-				final Result r = (Result) value;
-
-				// Fix for AMS Acta
-				Optional
-					.ofNullable(r.getInstance())
-					.map(
-						instance -> instance
-							.stream()
-							.filter(
-								i -> Optional
-									.ofNullable(i.getHostedby())
-									.map(KeyValue::getKey)
-									.map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016"))
-									.orElse(false)))
-					.ifPresent(instance -> instance.forEach(i -> {
-						if (Optional
-							.ofNullable(i.getPid())
-							.map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta")))
-							.orElse(false)) {
-							i.setHostedby(UNKNOWN_REPOSITORY);
-						}
-					}));
-			}
-		}
-		return value;
-	}
-
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -433,10 +433,7 @@ public class MergeUtils {

 		// merge datainfo for same context id
 		merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
-			ArrayList<DataInfo> di = new ArrayList<>();
-			di.addAll(r.getDataInfo());
-			di.addAll(l.getDataInfo());
-			r.setDataInfo(di);
+			r.getDataInfo().addAll(l.getDataInfo());
 			return r;
 		}));

--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -0,0 +1,109 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+@Disabled
+class ZenodoAPIClientTest {
+
+	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
+	private final String ACCESS_TOKEN = "";
+
+	private final String CONCEPT_REC_ID = "657113";
+
+	private final String depositionId = "674915";
+
+	@Test
+	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
+
+		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+		Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewDeposition() throws IOException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+		Assertions.assertEquals(201, client.newDeposition());
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
+
+		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+		Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewVersionNewName() throws IOException, MissingConceptDoiException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+
+		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/newVersion")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+	@Test
+	void testNewVersionOldName() throws IOException, MissingConceptDoiException {
+
+		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+			ACCESS_TOKEN);
+
+		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+		File file = new File(getClass()
+			.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
+			.getPath());
+
+		InputStream is = new FileInputStream(file);
+
+		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
+
+		Assertions.assertEquals(202, client.publish());
+
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -177,7 +177,7 @@ class OafMapperUtilsTest {
 		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));

 		assertEquals(
-			ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
+			ModelConstants.DATASET_RESULTTYPE_CLASSID,
 			((Result) MergeUtils
 				.merge(p2, d1))
 					.getResulttype()
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@ -6,7 +6,18 @@
        <artifactId>dhp-workflows</artifactId>
        <version>1.2.5-SNAPSHOT</version>
    </parent>
+
    <artifactId>dhp-aggregation</artifactId>
+
+    <properties>
+        <affro.release.version>1.0.0</affro.release.version>
+    </properties>
+
+    <scm>
+        <url>https://code-repo.d4science.org/mkallipo/affRo</url>
+        <connection>scm:git:https://code-repo.d4science.org/mkallipo/affRo.git</connection>
+    </scm>
+
    <build>
        <plugins>
            <plugin>
@ -43,6 +54,32 @@
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-scm-plugin</artifactId>
+                <version>1.8.1</version>
+                <configuration>
+                    <connectionType>connection</connectionType>
+                    <!--
+                     <scmVersionType>tag</scmVersionType>--><!-- 'branch' can also be provided here -->
+                  <!--  <scmVersion>${affro.release.version}</scmVersion>--><!-- in case of scmVersionType == 'branch', this field points to the branch name -->
+
+                    <scmVersionType>branch</scmVersionType><!-- 'branch' can also be provided here -->
+                    <scmVersion>openaire-workflow-ready</scmVersion><!-- in case of scmVersionType == 'branch', this field points to the branch name -->
+                    <checkoutDirectory>${project.build.directory}/${oozie.package.file.name}/${oozieAppDir}/affRo</checkoutDirectory>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>checkout-affro</id>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>checkout</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
        </plugins>

    </build>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
@ -46,6 +46,9 @@ public class GetOpenCitationsRefs implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}", outputPath);

+		final String backupPath = parser.get("backupPath");
+		log.info("backupPath {}", backupPath);
+
 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);

@ -53,11 +56,11 @@ public class GetOpenCitationsRefs implements Serializable {

 		GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();

-		ocr.doExtract(inputPath, outputPath, fileSystem);
+		ocr.doExtract(inputPath, outputPath, backupPath, fileSystem);

 	}

-	private void doExtract(String inputPath, String outputPath, FileSystem fileSystem)
+	private void doExtract(String inputPath, String outputPath, String backupPath, FileSystem fileSystem)
 		throws IOException {

 		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
@ -89,6 +92,7 @@ public class GetOpenCitationsRefs implements Serializable {
 				}

 			}
+			fileSystem.rename(fileStatus.getPath(), new Path(backupPath));
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -49,9 +49,6 @@ public class ReadCOCI implements Serializable {
 		final String workingPath = parser.get("inputPath");
 		log.info("workingPath {}", workingPath);

-		final String backupPath = parser.get("backupPath");
-		log.info("backupPath {}", backupPath);
-
 		SparkConf sconf = new SparkConf();

 		Configuration conf = new Configuration();
@ -71,14 +68,12 @@ public class ReadCOCI implements Serializable {
 					workingPath,
 					fileSystem,
 					outputPath,
-					backupPath,
 					delimiter);
 			});
 	}

 	private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem,
 		String outputPath,
-		String backupPath,
 		String delimiter) throws IOException {
 		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
 			.listFiles(
@ -113,7 +108,7 @@ public class ReadCOCI implements Serializable {
 				.option("compression", "gzip")
 				.json(outputPath);

-			fileSystem.rename(fileStatus.getPath(), new Path(backupPath));
+			fileSystem.delete(fileStatus.getPath());
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java
@ -297,7 +297,7 @@ public class ExtractPerson implements Serializable {
 	}

 	private static Relation getAffiliationRelation(Employment row) {
-		String source = PERSON_PREFIX + IdentifierFactory.md5(row.getOrcid());
+		String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(row.getOrcid());
 		String target = ROR_PREFIX
 			+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAffiliationId().getValue()));
 		List<KeyValue> properties = new ArrayList<>();
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/job.properties
@ -0,0 +1,45 @@
+# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
+# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
+# dhp.hadoop.frontend.user.name=ilias.kanellos
+# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
+# dhp.hadoop.frontend.port.ssh=22
+# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
+# jobTracker=yarnRM
+# nameNode=hdfs://nameservice1
+# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
+# maven.executable=mvn
+
+
+# The above is given differently in an example I found online
+oozie.action.sharelib.for.spark=spark2
+oozieActionShareLibForSpark2=spark2
+spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
+spark2EventLogDir=/user/spark/spark2ApplicationHistory
+sparkSqlWarehouseDir=/user/hive/warehouse
+#hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+# This MAY avoid the no library used error
+oozie.use.system.libpath=true
+# Some stuff copied from openaire's jobs
+spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
+spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
+
+# The following is needed as a property of a workflow
+wfAppPath=${oozieTopWfApplicationPath}
+
+resumeFrom=Crossref
+
+#OpenAlex input/output
+#resultFolder=/tmp/affro-results/oalex
+#inputFolder=/user/zeppelin/affiliations/raw_aff_string/2024-08
+
+#Crossref input/output
+resultFolder=/tmp/affro-results/crossref
+inputFolder=/data/doiboost/crossref/crossref_unpack
+
+#
+#crossrefInputPath=/data/bip-affiliations/crossref-data.json
+#pubmedInputPath=/data/bip-affiliations/pubmed-data.json
+#openapcInputPath=/data/bip-affiliations/openapc-data.json
+#dataciteInputPath=/data/bip-affiliations/datacite-data.json
+#
+#outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/oozie_app/config-default.xml
@ -0,0 +1,30 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/affiliations/oozie_app/workflow.xml
@ -0,0 +1,176 @@
+<workflow-app name="AffroAffiliations" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+
+    <start to="resumeFrom"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <decision name="resumeFrom">
+        <switch>
+            <case to="run-affro-on-iisdata">${wf:conf('resumeFrom') eq 'IIS'}</case>
+            <case to="run-affro-on-crossref">${wf:conf('resumeFrom') eq 'Crossref'}</case>
+            <default to="run-affro-on-oalexstrings"/>
+        </switch>
+    </decision>
+    <action name="run-affro-on-iisdata">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Affiliations inference (Affro)</name>
+            <jar>update_records.py</jar>
+
+            <spark-opts>
+                --executor-cores=4
+                --executor-memory=6G
+                --driver-memory=15G
+                --conf spark.executor.memoryOverhead=6G
+                --conf spark.sql.shuffle.partitions=20000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3
+                --conf spark.executorEnv.PYSPARK_PYTHON=python3
+                --py-files ${wfAppPath}/affRo/affro_cluster.py,${wfAppPath}/affRo/affro_test_example.py,${wfAppPath}/affRo/create_input_cluster.py,${wfAppPath}/affRo/functions_cluster.py,${wfAppPath}/affRo/matching_cluster.py
+                --files ${wfAppPath}/affRo/dictionaries/dix_acad.json,${wfAppPath}/affRo/dictionaries/dix_categ.json,${wfAppPath}/affRo/dictionaries/dix_city.json,${wfAppPath}/affRo/dictionaries/dix_country.json,${wfAppPath}/affRo/dictionaries/dix_mult.json,${wfAppPath}/affRo/txt_files/city_names.txt,${wfAppPath}/affRo/txt_files/remove_list.txt,${wfAppPath}/affRo/txt_files/stop_words.txt,${wfAppPath}/affRo/txt_files/university_terms.txt
+            </spark-opts>
+
+            <arg>${resultFolder}</arg>
+
+            <file>${wfAppPath}/affRo/update_records.py#update_records.py</file>
+        </spark>
+
+        <ok to="End" />
+        <error to="Kill" />
+
+    </action>
+
+    <action name="run-affro-on-oalexstrings">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Affiliations inference (Affro)</name>
+            <jar>strings.py</jar>
+
+            <spark-opts>
+                --executor-cores=4
+                --executor-memory=6G
+                --driver-memory=15G
+                --conf spark.executor.memoryOverhead=6G
+                --conf spark.sql.shuffle.partitions=20000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3
+                --conf spark.executorEnv.PYSPARK_PYTHON=python3
+                --py-files ${wfAppPath}/affRo/affro_cluster.py,${wfAppPath}/affRo/create_input_cluster.py,${wfAppPath}/affRo/functions_cluster.py,${wfAppPath}/affRo/matching_cluster.py
+                --files ${wfAppPath}/affRo/dictionaries/dix_acad.json,${wfAppPath}/affRo/dictionaries/dix_categ.json,${wfAppPath}/affRo/dictionaries/dix_city.json,${wfAppPath}/affRo/dictionaries/dix_country.json,${wfAppPath}/affRo/dictionaries/dix_mult.json,${wfAppPath}/affRo/dictionaries/dix_status.json,${wfAppPath}/affRo/txt_files/city_names.txt,${wfAppPath}/affRo/txt_files/remove_list.txt,${wfAppPath}/affRo/txt_files/stop_words.txt,${wfAppPath}/affRo/txt_files/university_terms.txt
+            </spark-opts>
+
+            <arg>${inputFolder}</arg>
+            <arg>${resultFolder}</arg>
+
+            <file>${wfAppPath}/affRo/strings.py#strings.py</file>
+        </spark>
+
+        <ok to="End" />
+        <error to="Kill" />
+
+    </action>
+
+    <action name="run-affro-on-crossref">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Affiliations inference (Affro)</name>
+            <jar>crossref.py</jar>
+
+            <spark-opts>
+                --executor-cores=4
+                --executor-memory=6G
+                --driver-memory=15G
+                --conf spark.executor.memoryOverhead=6G
+                --conf spark.sql.shuffle.partitions=20000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3
+                --conf spark.executorEnv.PYSPARK_PYTHON=python3
+                --py-files ${wfAppPath}/affRo/affro_cluster.py,${wfAppPath}/affRo/create_input_cluster.py,${wfAppPath}/affRo/functions_cluster.py,${wfAppPath}/affRo/matching_cluster.py
+                --files ${wfAppPath}/affRo/dictionaries/dix_acad.json,${wfAppPath}/affRo/dictionaries/dix_categ.json,${wfAppPath}/affRo/dictionaries/dix_city.json,${wfAppPath}/affRo/dictionaries/dix_country.json,${wfAppPath}/affRo/dictionaries/dix_mult.json,${wfAppPath}/affRo/dictionaries/dix_status.json,${wfAppPath}/affRo/txt_files/city_names.txt,${wfAppPath}/affRo/txt_files/remove_list.txt,${wfAppPath}/affRo/txt_files/stop_words.txt,${wfAppPath}/affRo/txt_files/university_terms.txt
+            </spark-opts>
+
+            <arg>${inputFolder}</arg>
+            <arg>${resultFolder}</arg>
+
+            <file>${wfAppPath}/affRo/crossref.py#crossref.py</file>
+        </spark>
+
+        <ok to="End" />
+        <error to="Kill" />
+
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -35,6 +35,5 @@ crossrefInputPath=/data/bip-affiliations/crossref-data.json
 pubmedInputPath=/data/bip-affiliations/pubmed-data.json
 openapcInputPath=/data/bip-affiliations/openapc-data.json
 dataciteInputPath=/data/bip-affiliations/datacite-data.json
-webCrawlInputPath=/data/bip-affiliations/webCrawl/

 outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -21,10 +21,6 @@
            <name>webCrawlInputPath</name>
            <description>the path where to find the inferred affiliation relations from webCrawl</description>
        </property>
-        <property>
-            <name>publisherInputPath</name>
-            <description>the path where to find the inferred affiliation relations from publisher websites</description>
-        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -121,7 +117,6 @@
            <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
            <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
            <arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
-            <arg>--publisherInputPath</arg><arg>${publisherInputPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
@ -16,5 +16,11 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
+  },
+  {
+    "paramName": "bp",
+    "paramLongName": "backupPath",
+    "paramDescription": "the hdfs path to move the OC data after the extraction",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
@ -30,12 +30,6 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
-  },
-  {
-    "paramName": "bp",
-    "paramLongName": "backupPath",
-    "paramDescription": "the hdfs path to move the OC data after the extraction",
-    "paramRequired": true
  }
 ]

--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
@ -94,17 +94,7 @@
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
            <arg>--inputPath</arg><arg>${inputPath}/Original</arg>
            <arg>--outputPath</arg><arg>${inputPath}/Extracted</arg>
-        </java>
-        <ok to="read"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extract_correspondence">
-        <java>
-            <main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
-            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
-            <arg>--inputPath</arg><arg>${inputPath}/correspondence</arg>
-            <arg>--outputPath</arg><arg>${inputPath}/correspondence_extracted</arg>
+            <arg>--backupPath</arg><arg>${inputPath}/backup</arg>
        </java>
        <ok to="read"/>
        <error to="Kill"/>
@ -129,7 +119,6 @@
            </spark-opts>
            <arg>--inputPath</arg><arg>${inputPath}/Extracted</arg>
            <arg>--outputPath</arg><arg>${inputPath}/JSON</arg>
-            <arg>--backupPath</arg><arg>${inputPath}/backup</arg>
            <arg>--delimiter</arg><arg>${delimiter}</arg>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
        </spark>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
@ -16,11 +16,10 @@
    "paramLongName": "isSparkSessionManged",
    "paramDescription": "the hdfs name node",
    "paramRequired": false
-  },
-  {
-    "paramName": "nn",
-    "paramLongName": "nameNode",
-    "paramDescription": "the hdfs name node",
-    "paramRequired": true
-  }
+  },{
+  "paramName": "nn",
+  "paramLongName": "nameNode",
+  "paramDescription": "the hdfs name node",
+  "paramRequired": true
+}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
@ -24,7 +24,7 @@

    <decision name="resume_from">
        <switch>
-            <case to="reset_workingDir">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
+            <case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
            <default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
        </switch>
    </decision>
@ -33,14 +33,6 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="reset_workingDir">
-        <fs>
-            <delete path="${workingDir}"/>
-            <mkdir path="${workingDir}"/>
-        </fs>
-        <ok to="download"/>
-        <error to="Kill"/>
-    </action>
    <action name="download">
        <shell xmlns="uri:oozie:shell-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
@ -1,4 +1,4 @@
-<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
+    <workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
@ -8,40 +8,19 @@
            <name>database</name>
            <description>the PDB Database Working Path</description>
        </property>
+
        <property>
-            <name>mdStoreOutputId</name>
-            <description>the identifier of the cleaned MDStore</description>
-        </property>
-        <property>
-            <name>mdStoreManagerURI</name>
-            <description>the path of the cleaned mdstore</description>
+            <name>targetPath</name>
+            <description>the Target Working dir path</description>
        </property>
    </parameters>

-    <start to="StartTransaction"/>
-
+    <start to="ConvertDB"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="StartTransaction">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>NEW_VERSION</arg>
-            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            <capture-output/>
-        </java>
-        <ok to="ConvertDB"/>
-        <error to="RollBack"/>
-    </action>
    <action name="ConvertDB">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -62,48 +41,11 @@
            <arg>--master</arg><arg>yarn</arg>
            <arg>--dbPath</arg><arg>${sourcePath}</arg>
            <arg>--database</arg><arg>${database}</arg>
-            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
        </spark>
-        <ok to="CommitVersion"/>
-        <error to="RollBack"/>
-
+        <ok to="End"/>
+        <error to="Kill"/>
    </action>
-        <action name="CommitVersion">
-            <java>
-                <configuration>
-                    <property>
-                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                        <value>true</value>
-                    </property>
-                </configuration>
-                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-                <arg>--action</arg><arg>COMMIT</arg>
-                <arg>--namenode</arg><arg>${nameNode}</arg>
-                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            </java>
-            <ok to="End"/>
-            <error to="Kill"/>
-        </action>
-
-        <action name="RollBack">
-            <java>
-                <configuration>
-                    <property>
-                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                        <value>true</value>
-                    </property>
-                </configuration>
-                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-                <arg>--action</arg><arg>ROLLBACK</arg>
-                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            </java>
-            <ok to="Kill"/>
-            <error to="Kill"/>
-        </action>
-
-
-        <end name="End"/>
+    <end name="End"/>

 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
@ -2,5 +2,5 @@
  {"paramName":"mt",  "paramLongName":"master",       "paramDescription": "should be local or yarn",                  "paramRequired": true},
  {"paramName":"db",  "paramLongName":"database",     "paramDescription": "should be PDB or UNIPROT",                 "paramRequired": true},
  {"paramName":"p",   "paramLongName":"dbPath",       "paramDescription": "the path of the database to transform",    "paramRequired": true},
-  {"paramName":"mo",   "paramLongName":"mdstoreOutputVersion",     "paramDescription": "the oaf path ",                "paramRequired": true}
+  {"paramName":"t",   "paramLongName":"targetPath",   "paramDescription": "the OAF target path ",                     "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
@ -1,20 +1,5 @@
 [
-  {
-    "paramName": "mt",
-    "paramLongName": "master",
-    "paramDescription": "should be local or yarn",
-    "paramRequired": true
-  },
-  {
-    "paramName": "s",
-    "paramLongName": "sourcePath",
-    "paramDescription": "the source Path",
-    "paramRequired": true
-  },
-  {
-    "paramName": "mo",
-    "paramLongName": "mdstoreOutputVersion",
-    "paramDescription": "the oaf path ",
-    "paramRequired": true
-  }
+  {"paramName":"mt",  "paramLongName":"master",     "paramDescription": "should be local or yarn",                  "paramRequired": true},
+  {"paramName":"s",   "paramLongName":"sourcePath","paramDescription": "the source Path",                              "paramRequired": true},
+  {"paramName":"t",   "paramLongName":"targetPath","paramDescription": "the  oaf path ",  "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
@ -9,26 +9,34 @@
            <description>the Working Path</description>
        </property>
        <property>
-            <name>mdStoreOutputId</name>
-            <description>the identifier of the cleaned MDStore</description>
+            <name>targetPath</name>
+            <description>the OAF MDStore Path</description>
        </property>
        <property>
-            <name>mdStoreManagerURI</name>
-            <description>the path of the cleaned mdstore</description>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>resumeFrom</name>
-            <value>CreateEBIDataSet</value>
+            <value>DownloadEBILinks</value>
            <description>node to start</description>
        </property>
    </parameters>

-    <start to="StartTransaction"/>
+    <start to="resume_from"/>

    <decision name="resume_from">
        <switch>
            <case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
-            <case to="StartTransaction">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
+            <case to="CreateEBIDataSet">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
            <default to="DownloadEBILinks"/>
        </switch>
    </decision>
@ -69,29 +77,9 @@
            <move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
            <move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
        </fs>
-        <ok to="StartTransaction"/>
+        <ok to="CreateEBIDataSet"/>
        <error to="Kill"/>
    </action>
-
-    <action name="StartTransaction">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>NEW_VERSION</arg>
-            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-            <capture-output/>
-        </java>
-        <ok to="CreateEBIDataSet"/>
-        <error to="RollBack"/>
-    </action>
-
-
    <action name="CreateEBIDataSet">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
@ -107,49 +95,11 @@
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
-            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
            <arg>--master</arg><arg>yarn</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
-
-
-    <action name="CommitVersion">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>COMMIT</arg>
-            <arg>--namenode</arg><arg>${nameNode}</arg>
-            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-        </java>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="RollBack">
-        <java>
-            <configuration>
-                <property>
-                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
-                    <value>true</value>
-                </property>
-            </configuration>
-            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
-            <arg>--action</arg><arg>ROLLBACK</arg>
-            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
-            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
-        </java>
-        <ok to="Kill"/>
-        <error to="Kill"/>
-    </action>
-
    <end name="End"/>
-
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -407,10 +407,9 @@ object DataciteToOAFTransformation {
        )
      }
      if (c.affiliation.isDefined)
-        a.setAffiliation(
+        a.setRawAffiliationString(
          c.affiliation.get
            .filter(af => af.nonEmpty)
-            .map(af => OafMapperUtils.field(af, dataInfo))
            .asJava
        )
      a.setRank(idx + 1)
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -231,7 +231,7 @@ object BioDBToOAF {
  def uniprotToOAF(input: String): List[Oaf] = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)
-    val pid = (json \ "pid").extract[String].trim()
+    val pid = (json \ "pid").extract[String]

    val d = new Dataset

--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
@ -2,15 +2,12 @@ package eu.dnetlib.dhp.sx.bio

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.collection.CollectionUtils
-import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
-import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
 import eu.dnetlib.dhp.schema.oaf.Oaf
 import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
-import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkTransformBioDatabaseToOAF {

@ -28,13 +25,8 @@ object SparkTransformBioDatabaseToOAF {

    val dbPath: String = parser.get("dbPath")
    log.info("dbPath: {}", database)
-
-    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
-    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
-
-    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
-    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
-    log.info("outputBasePath: {}", outputBasePath)
+    val targetPath: String = parser.get("targetPath")
+    log.info("targetPath: {}", database)

    val spark: SparkSession =
      SparkSession
@ -51,28 +43,24 @@ object SparkTransformBioDatabaseToOAF {
      case "UNIPROT" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "PDB" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "SCHOLIX" =>
        CollectionUtils.saveDataset(
          spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
      case "CROSSREF_LINKS" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
-          s"$outputBasePath/$MDSTORE_DATA_PATH"
+          targetPath
        )
    }
-
-    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
-    val mdStoreSize = df.count
-    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }

 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
@ -9,9 +9,6 @@ import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql._
 import org.slf4j.{Logger, LoggerFactory}
-import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
-import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
-import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkEBILinksToOaf {

@ -35,13 +32,8 @@ object SparkEBILinksToOaf {
    import spark.implicits._
    val sourcePath = parser.get("sourcePath")
    log.info(s"sourcePath  -> $sourcePath")
-    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
-    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
-
-    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
-    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
-    log.info("outputBasePath: {}", outputBasePath)
-
+    val targetPath = parser.get("targetPath")
+    log.info(s"targetPath  -> $targetPath")
    implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])

    val ebLinks: Dataset[EBILinkItem] = spark.read
@ -54,10 +46,7 @@ object SparkEBILinksToOaf {
        .flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
        .filter(p => BioDBToOAF.EBITargetLinksFilter(p))
        .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
-      s"$outputBasePath/$MDSTORE_DATA_PATH"
+      targetPath
    )
-    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
-    val mdStoreSize = df.count
-    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -28,7 +28,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
 import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
 import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;

@ -40,8 +39,7 @@ public class PrepareAffiliationRelationsTest {

 	private static Path workingDir;
 	private static final String ID_PREFIX = "50|doi_________::";
-	private static final Logger log = LoggerFactory
-		.getLogger(PrepareAffiliationRelationsTest.class);
+	private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelationsTest.class);

 	@BeforeAll
 	public static void beforeAll() throws IOException {
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
@ -77,13 +77,13 @@ public class RemapTest {
 		MapOCIdsInPids
 			.main(
 				new String[] {
-					"--isSparkSessionManged",
+					"-isSparkSessionManged",
 					Boolean.FALSE.toString(),
-					"--inputPath",
+					"-inputPath",
 					inputPath,
-					"--outputPath",
+					"-outputPath",
 					workingDir.toString() + "/out/",
-					"--nameNode", "hdfs://localhost"
+					"-nameNode", "input1;input2;input3;input4;input5"
 				});

 	}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
@ -1,44 +1,15 @@
-{"classification": "Signaling protein", "pdb": "5NM4", "deposition_date": "2017-04-05", "title": "A2a adenosine receptor room-temperature structure determined by serial Femtosecond crystallography", "Keywords": ["Oom-temperature", " serial crystallography", " signaling protein"], "authors": ["T.weinert", "R.cheng", "D.james", "D.gashi", "P.nogly", "K.jaeger", "M.hennig", "", "J.standfuss"], "pmid": "28912485", "doi": "10.1038/S41467-017-00630-4"}
-{"classification": "Oxidoreductase/oxidoreductase inhibitor", "pdb": "4KN3", "deposition_date": "2013-05-08", "title": "Structure of the y34ns91g double mutant of dehaloperoxidase from Amphitrite ornata with 2,4,6-trichlorophenol", "Keywords": ["Lobin", " oxygen storage", " peroxidase", " oxidoreductase", " oxidoreductase-", "Oxidoreductase inhibitor complex"], "authors": ["C.wang", "L.lovelace", "L.lebioda"], "pmid": "23952341", "doi": "10.1021/BI400627W"}
-{"classification": "Transport protein", "pdb": "8HKM", "deposition_date": "2022-11-27", "title": "Ion channel", "Keywords": ["On channel", " transport protein"], "authors": ["D.h.jiang", "J.t.zhang"], "pmid": "37494189", "doi": "10.1016/J.CELREP.2023.112858"}
-{"classification": "Signaling protein", "pdb": "6JT1", "deposition_date": "2019-04-08", "title": "Structure of human soluble guanylate cyclase in the heme oxidised State", "Keywords": ["Oluble guanylate cyclase", " signaling protein"], "authors": ["L.chen", "Y.kang", "R.liu", "J.-x.wu"], "pmid": "31514202", "doi": "10.1038/S41586-019-1584-6"}
-{"classification": "Immune system", "pdb": "7OW6", "deposition_date": "2021-06-16", "title": "Crystal structure of a tcr in complex with hla-a*11:01 bound to kras G12d peptide (vvvgadgvgk)", "Keywords": ["La", " kras", " tcr", " immune system"], "authors": ["V.karuppiah", "R.a.robinson"], "doi": "10.1038/S41467-022-32811-1"}
-{"classification": "Biosynthetic protein", "pdb": "5EQ8", "deposition_date": "2015-11-12", "title": "Crystal structure of medicago truncatula histidinol-phosphate Phosphatase (mthpp) in complex with l-histidinol", "Keywords": ["Istidine biosynthesis", " metabolic pathways", " dimer", " plant", "", "Biosynthetic protein"], "authors": ["M.ruszkowski", "Z.dauter"], "pmid": "26994138", "doi": "10.1074/JBC.M115.708727"}
-{"classification": "De novo protein", "pdb": "8CWA", "deposition_date": "2022-05-18", "title": "Solution nmr structure of 8-residue rosetta-designed cyclic peptide D8.21 in cdcl3 with cis/trans switching (tc conformation, 53%)", "Keywords": ["Yclic peptide", " non natural amino acids", " cis/trans", " switch peptides", "", "De novo design", "Membrane permeability", "De novo protein"], "authors": ["T.a.ramelot", "R.tejero", "G.t.montelione"], "pmid": "36041435", "doi": "10.1016/J.CELL.2022.07.019"}
-{"classification": "Hydrolase", "pdb": "3R6M", "deposition_date": "2011-03-21", "title": "Crystal structure of vibrio parahaemolyticus yeaz", "Keywords": ["Ctin/hsp70 nucleotide-binding fold", " bacterial resuscitation", " viable", "But non-culturable state", "Resuscitation promoting factor", "Ygjd", "", "Yjee", "Vibrio parahaemolyticus", "Hydrolase"], "authors": ["A.roujeinikova", "I.aydin"], "pmid": "21858042", "doi": "10.1371/JOURNAL.PONE.0023245"}
-{"classification": "Hydrolase", "pdb": "2W5J", "deposition_date": "2008-12-10", "title": "Structure of the c14-rotor ring of the proton translocating Chloroplast atp synthase", "Keywords": ["Ydrolase", " chloroplast", " atp synthase", " lipid-binding", " cf(0)", " membrane", "", "Transport", "Formylation", "Energy transduction", "Hydrogen ion transport", "", "Ion transport", "Transmembrane", "Membrane protein"], "authors": ["M.vollmar", "D.schlieper", "M.winn", "C.buechner", "G.groth"], "pmid": "19423706", "doi": "10.1074/JBC.M109.006916"}
-{"classification": "De novo protein", "pdb": "4GLU", "deposition_date": "2012-08-14", "title": "Crystal structure of the mirror image form of vegf-a", "Keywords": ["-protein", " covalent dimer", " cysteine knot protein", " growth factor", " de", "Novo protein"], "authors": ["K.mandal", "M.uppalapati", "D.ault-riche", "J.kenney", "J.lowitz", "S.sidhu", "", "S.b.h.kent"], "pmid": "22927390", "doi": "10.1073/PNAS.1210483109"}
-{"classification": "Hydrolase/hydrolase inhibitor", "pdb": "3WYL", "deposition_date": "2014-09-01", "title": "Crystal structure of the catalytic domain of pde10a complexed with 5- Methoxy-3-(1-phenyl-1h-pyrazol-5-yl)-1-(3-(trifluoromethyl)phenyl) Pyridazin-4(1h)-one", "Keywords": ["Ydrolase-hydrolase inhibitor complex"], "authors": ["H.oki", "Y.hayano"], "pmid": "25384088", "doi": "10.1021/JM5013648"}
-{"classification": "Isomerase", "pdb": "5BOR", "deposition_date": "2015-05-27", "title": "Structure of acetobacter aceti pure-s57c, sulfonate form", "Keywords": ["Cidophile", " pure", " purine biosynthesis", " isomerase"], "authors": ["K.l.sullivan", "T.j.kappock"]}
-{"classification": "Hydrolase", "pdb": "1X0C", "deposition_date": "2005-03-17", "title": "Improved crystal structure of isopullulanase from aspergillus niger Atcc 9642", "Keywords": ["Ullulan", " glycoside hydrolase family 49", " glycoprotein", " hydrolase"], "authors": ["M.mizuno", "T.tonozuka", "A.yamamura", "Y.miyasaka", "H.akeboshi", "S.kamitori", "", "A.nishikawa", "Y.sakano"], "pmid": "18155243", "doi": "10.1016/J.JMB.2007.11.098"}
-{"classification": "Oxidoreductase", "pdb": "7CUP", "deposition_date": "2020-08-23", "title": "Structure of 2,5-dihydroxypridine dioxygenase from pseudomonas putida Kt2440", "Keywords": ["On-heme dioxygenase", " oxidoreductase"], "authors": ["G.q.liu", "H.z.tang"]}
-{"classification": "Ligase", "pdb": "1VCN", "deposition_date": "2004-03-10", "title": "Crystal structure of t.th. hb8 ctp synthetase complex with sulfate Anion", "Keywords": ["Etramer", " riken structural genomics/proteomics initiative", " rsgi", "", "Structural genomics", "Ligase"], "authors": ["M.goto", "Riken structural genomics/proteomics initiative (rsgi)"], "pmid": "15296735", "doi": "10.1016/J.STR.2004.05.013"}
-{"classification": "Transferase/transferase inhibitor", "pdb": "6C9V", "deposition_date": "2018-01-28", "title": "Mycobacterium tuberculosis adenosine kinase bound to (2r,3s,4r,5r)-2- (hydroxymethyl)-5-(6-(4-phenylpiperazin-1-yl)-9h-purin-9-yl) Tetrahydrofuran-3,4-diol", "Keywords": ["Ucleoside analog", " complex", " inhibitor", " structural genomics", " psi-2", "", "Protein structure initiative", "Tb structural genomics consortium", "", "Tbsgc", "Transferase-transferase inhibitor complex"], "authors": ["R.a.crespo", "Tb structural genomics consortium (tbsgc)"], "pmid": "31002508", "doi": "10.1021/ACS.JMEDCHEM.9B00020"}
-{"classification": "De novo protein", "pdb": "4LPY", "deposition_date": "2013-07-16", "title": "Crystal structure of tencon variant g10", "Keywords": ["Ibronectin type iii fold", " alternate scaffold", " de novo protein"], "authors": ["A.teplyakov", "G.obmolova", "G.l.gilliland"], "pmid": "24375666", "doi": "10.1002/PROT.24502"}
-{"classification": "Isomerase", "pdb": "2Y88", "deposition_date": "2011-02-03", "title": "Crystal structure of mycobacterium tuberculosis phosphoribosyl Isomerase (variant d11n) with bound prfar", "Keywords": ["Romatic amino acid biosynthesis", " isomerase", " tim-barrel", " histidine", "Biosynthesis", "Tryptophan biosynthesis"], "authors": ["J.kuper", "A.v.due", "A.geerlof", "M.wilmanns"], "pmid": "21321225", "doi": "10.1073/PNAS.1015996108"}
-{"classification": "Unknown function", "pdb": "1SR0", "deposition_date": "2004-03-22", "title": "Crystal structure of signalling protein from sheep(sps-40) at 3.0a Resolution using crystal grown in the presence of polysaccharides", "Keywords": ["Ignalling protein", " involution", " unknown function"], "authors": ["D.b.srivastava", "A.s.ethayathulla", "N.singh", "J.kumar", "S.sharma", "T.p.singh"]}
-{"classification": "Dna binding protein", "pdb": "3RH2", "deposition_date": "2011-04-11", "title": "Crystal structure of a tetr-like transcriptional regulator (sama_0099) From shewanella amazonensis sb2b at 2.42 a resolution", "Keywords": ["Na/rna-binding 3-helical bundle", " structural genomics", " joint center", "For structural genomics", "Jcsg", "Protein structure initiative", "Psi-", "Biology", "Dna binding protein"], "authors": ["Joint center for structural genomics (jcsg)"]}
-{"classification": "Transferase", "pdb": "2WK5", "deposition_date": "2009-06-05", "title": "Structural features of native human thymidine phosphorylase And in complex with 5-iodouracil", "Keywords": ["Lycosyltransferase", " developmental protein", " angiogenesis", "", "5-iodouracil", "Growth factor", "Enzyme kinetics", "", "Differentiation", "Disease mutation", "Thymidine", "Phosphorylase", "Chemotaxis", "Transferase", "Mutagenesis", "", "Polymorphism"], "authors": ["E.mitsiki", "A.c.papageorgiou", "S.iyer", "N.thiyagarajan", "S.h.prior", "", "D.sleep", "C.finnis", "K.r.acharya"], "pmid": "19555658", "doi": "10.1016/J.BBRC.2009.06.104"}
-{"classification": "Hydrolase", "pdb": "3P9Y", "deposition_date": "2010-10-18", "title": "Crystal structure of the drosophila melanogaster ssu72-pctd complex", "Keywords": ["Hosphatase", " cis proline", " lmw ptp-like fold", " rna polymerase ii ctd", "", "Hydrolase"], "authors": ["J.w.werner-allen", "P.zhou"], "pmid": "21159777", "doi": "10.1074/JBC.M110.197129"}
-{"classification": "Recombination/dna", "pdb": "6OEO", "deposition_date": "2019-03-27", "title": "Cryo-em structure of mouse rag1/2 nfc complex (dna1)", "Keywords": ["(d)j recombination", " dna transposition", " rag", " scid", " recombination", "", "Recombination-dna complex"], "authors": ["X.chen", "Y.cui", "Z.h.zhou", "W.yang", "M.gellert"], "pmid": "32015552", "doi": "10.1038/S41594-019-0363-2"}
-{"classification": "Hydrolase", "pdb": "4ECA", "deposition_date": "1997-02-21", "title": "Asparaginase from e. coli, mutant t89v with covalently bound aspartate", "Keywords": ["Ydrolase", " acyl-enzyme intermediate", " threonine amidohydrolase"], "authors": ["G.j.palm", "J.lubkowski", "A.wlodawer"], "pmid": "8706862", "doi": "10.1016/0014-5793(96)00660-6"}
-{"classification": "Transcription/protein binding", "pdb": "3UVX", "deposition_date": "2011-11-30", "title": "Crystal structure of the first bromodomain of human brd4 in complex With a diacetylated histone 4 peptide (h4k12ack16ac)", "Keywords": ["Romodomain", " bromodomain containing protein 4", " cap", " hunk1", " mcap", "", "Mitotic chromosome associated protein", "Peptide complex", "Structural", "Genomics consortium", "Sgc", "Transcription-protein binding complex"], "authors": ["P.filippakopoulos", "S.picaud", "T.keates", "E.ugochukwu", "F.von delft", "", "C.h.arrowsmith", "A.m.edwards", "J.weigelt", "C.bountra", "S.knapp", "Structural", "Genomics consortium (sgc)"], "pmid": "22464331", "doi": "10.1016/J.CELL.2012.02.013"}
-{"classification": "Membrane protein", "pdb": "1TLZ", "deposition_date": "2004-06-10", "title": "Tsx structure complexed with uridine", "Keywords": ["Ucleoside transporter", " beta barrel", " uridine", " membrane", "Protein"], "authors": ["J.ye", "B.van den berg"], "pmid": "15272310", "doi": "10.1038/SJ.EMBOJ.7600330"}
-{"classification": "Dna binding protein", "pdb": "7AZD", "deposition_date": "2020-11-16", "title": "Dna polymerase sliding clamp from escherichia coli with peptide 20 Bound", "Keywords": ["Ntibacterial drug", " dna binding protein"], "authors": ["C.monsarrat", "G.compain", "C.andre", "I.martiel", "S.engilberge", "V.olieric", "", "P.wolff", "K.brillet", "M.landolfo", "C.silva da veiga", "J.wagner", "G.guichard", "", "D.y.burnouf"], "pmid": "34806883", "doi": "10.1021/ACS.JMEDCHEM.1C00918"}
-{"classification": "Transferase", "pdb": "5N3K", "deposition_date": "2017-02-08", "title": "Camp-dependent protein kinase a from cricetulus griseus in complex With fragment like molecule o-guanidino-l-homoserine", "Keywords": ["Ragment", " complex", " transferase", " serine threonine kinase", " camp", "", "Kinase", "Pka"], "authors": ["C.siefker", "A.heine", "G.klebe"]}
-{"classification": "Biosynthetic protein", "pdb": "8H52", "deposition_date": "2022-10-11", "title": "Crystal structure of helicobacter pylori carboxyspermidine Dehydrogenase in complex with nadp", "Keywords": ["Arboxyspermidine dehydrogenase", " biosynthetic protein"], "authors": ["K.y.ko", "S.c.park", "S.y.cho", "S.i.yoon"], "pmid": "36283333", "doi": "10.1016/J.BBRC.2022.10.049"}
-{"classification": "Metal binding protein", "pdb": "6DYC", "deposition_date": "2018-07-01", "title": "Co(ii)-bound structure of the engineered cyt cb562 variant, ch3", "Keywords": ["Esigned protein", " 4-helix bundle", " electron transport", " metal binding", "Protein"], "authors": ["F.a.tezcan", "J.rittle"], "pmid": "30778140", "doi": "10.1038/S41557-019-0218-9"}
-{"classification": "Protein fibril", "pdb": "6A6B", "deposition_date": "2018-06-27", "title": "Cryo-em structure of alpha-synuclein fiber", "Keywords": ["Lpha-syn fiber", " parkinson disease", " protein fibril"], "authors": ["Y.w.li", "C.y.zhao", "F.luo", "Z.liu", "X.gui", "Z.luo", "X.zhang", "D.li", "C.liu", "X.li"], "pmid": "30065316", "doi": "10.1038/S41422-018-0075-X"}
-{"classification": "Dna", "pdb": "7D5E", "deposition_date": "2020-09-25", "title": "Left-handed g-quadruplex containing two bulges", "Keywords": ["-quadruplex", " bulge", " dna", " left-handed"], "authors": ["P.das", "A.maity", "K.h.ngo", "F.r.winnerdy", "B.bakalar", "Y.mechulam", "E.schmitt", "", "A.t.phan"], "pmid": "33503265", "doi": "10.1093/NAR/GKAA1259"}
-{"classification": "Transferase", "pdb": "3RSY", "deposition_date": "2011-05-02", "title": "Cellobiose phosphorylase from cellulomonas uda in complex with sulfate And glycerol", "Keywords": ["H94", " alpha barrel", " cellobiose phosphorylase", " disaccharide", "Phosphorylase", "Transferase"], "authors": ["A.van hoorebeke", "J.stout", "W.soetaert", "J.van beeumen", "T.desmet", "S.savvides"]}
-{"classification": "Oxidoreductase", "pdb": "7MCI", "deposition_date": "2021-04-02", "title": "Mofe protein from azotobacter vinelandii with a sulfur-replenished Cofactor", "Keywords": ["Zotobacter vinelandii", " mofe-protein", " nitrogenase", " oxidoreductase"], "authors": ["W.kang", "C.lee", "Y.hu", "M.w.ribbe"], "doi": "10.1038/S41929-022-00782-7"}
-{"classification": "Dna", "pdb": "1XUW", "deposition_date": "2004-10-26", "title": "Structural rationalization of a large difference in rna affinity Despite a small difference in chemistry between two 2'-o-modified Nucleic acid analogs", "Keywords": ["Na mimetic methylcarbamate amide analog", " dna"], "authors": ["R.pattanayek", "L.sethaphong", "C.pan", "M.prhavc", "T.p.prakash", "M.manoharan", "", "M.egli"], "pmid": "15547979", "doi": "10.1021/JA044637K"}
-{"classification": "Lyase", "pdb": "7C0D", "deposition_date": "2020-05-01", "title": "Crystal structure of azospirillum brasilense l-2-keto-3-deoxyarabonate Dehydratase (hydroxypyruvate-bound form)", "Keywords": ["-2-keto-3-deoxyarabonate dehydratase", " lyase"], "authors": ["Y.watanabe", "S.watanabe"], "pmid": "32697085", "doi": "10.1021/ACS.BIOCHEM.0C00515"}
-{"classification": "Signaling protein", "pdb": "5LYK", "deposition_date": "2016-09-28", "title": "Crystal structure of intracellular b30.2 domain of btn3a1 bound to Citrate", "Keywords": ["30.2", " butyrophilin", " signaling protein"], "authors": ["F.mohammed", "A.t.baker", "M.salim", "B.e.willcox"], "pmid": "28862425", "doi": "10.1021/ACSCHEMBIO.7B00694"}
-{"classification": "Toxin", "pdb": "4IZL", "deposition_date": "2013-01-30", "title": "Structure of the n248a mutant of the panton-valentine leucocidin s Component from staphylococcus aureus", "Keywords": ["I-component leucotoxin", " staphylococcus aureus", " s component", "Leucocidin", "Beta-barrel pore forming toxin", "Toxin"], "authors": ["L.maveyraud", "B.j.laventie", "G.prevost", "L.mourey"], "pmid": "24643034", "doi": "10.1371/JOURNAL.PONE.0092094"}
-{"classification": "Dna", "pdb": "6F3C", "deposition_date": "2017-11-28", "title": "The cytotoxic [pt(h2bapbpy)] platinum complex interacting with the Cgtacg hexamer", "Keywords": ["Rug-dna complex", " four-way junction", " dna"], "authors": ["M.ferraroni", "C.bazzicalupi", "P.gratteri", "F.papi"], "pmid": "31046177", "doi": "10.1002/ANIE.201814532"}
-{"classification": "Signaling protein/inhibitor", "pdb": "4L5M", "deposition_date": "2013-06-11", "title": "Complexe of arno sec7 domain with the protein-protein interaction Inhibitor n-(4-hydroxy-2,6-dimethylphenyl)benzenesulfonamide at ph6.5", "Keywords": ["Ec-7domain", " signaling protein-inhibitor complex"], "authors": ["F.hoh", "J.rouhana"], "pmid": "24112024", "doi": "10.1021/JM4009357"}
-{"classification": "Signaling protein", "pdb": "5I6J", "deposition_date": "2016-02-16", "title": "Crystal structure of srgap2 f-barx", "Keywords": ["Rgap2", " f-bar", " fx", " signaling protein"], "authors": ["M.sporny", "J.guez-haddad", "M.n.isupov", "Y.opatowsky"], "pmid": "28333212", "doi": "10.1093/MOLBEV/MSX094"}
-{"classification": "Metal binding protein", "pdb": "1Q80", "deposition_date": "2003-08-20", "title": "Solution structure and dynamics of nereis sarcoplasmic calcium binding Protein", "Keywords": ["Ll-alpha", " metal binding protein"], "authors": ["G.rabah", "R.popescu", "J.a.cox", "Y.engelborghs", "C.t.craescu"], "pmid": "15819893", "doi": "10.1111/J.1742-4658.2005.04629.X"}
-{"classification": "Transferase", "pdb": "1TW1", "deposition_date": "2004-06-30", "title": "Beta-1,4-galactosyltransferase mutant met344his (m344h-gal-t1) complex With udp-galactose and magnesium", "Keywords": ["Et344his mutation; closed conformation; mn binding", " transferase"], "authors": ["B.ramakrishnan", "E.boeggeman", "P.k.qasba"], "pmid": "15449940", "doi": "10.1021/BI049007+"}
-{"classification": "Rna", "pdb": "2PN4", "deposition_date": "2007-04-23", "title": "Crystal structure of hepatitis c virus ires subdomain iia", "Keywords": ["Cv", " ires", " subdoamin iia", " rna", " strontium", " hepatitis"], "authors": ["Q.zhao", "Q.han", "C.r.kissinger", "P.a.thompson"], "pmid": "18391410", "doi": "10.1107/S0907444908002011"}
+{"pdb": "1CW0", "title": "crystal structure analysis of very short patch repair (vsr) endonuclease in complex with a duplex dna", "authors": ["S.E.Tsutakawa", "H.Jingami", "K.Morikawa"], "doi": "10.1016/S0092-8674(00)81550-0", "pmid": "10612397"}
+{"pdb": "2CWW", "title": "crystal structure of thermus thermophilus ttha1280, a putative sam- dependent rna methyltransferase, in complex with s-adenosyl-l- homocysteine", "authors": ["A.A.Pioszak", "K.Murayama", "N.Nakagawa", "A.Ebihara", "S.Kuramitsu", "M.Shirouzu", "S.Yokoyama", "Riken Structural Genomics/proteomics Initiative (Rsgi)"], "doi": "10.1107/S1744309105029842", "pmid": "16511182"}
+{"pdb": "6CWE", "title": "structure of alpha-gsa[8,6p] bound by cd1d and in complex with the va14vb8.2 tcr", "authors": ["J.Wang", "D.Zajonc"], "doi": null, "pmid": null}
+{"pdb": "5CWS", "title": "crystal structure of the intact chaetomium thermophilum nsp1-nup49- nup57 channel nucleoporin heterotrimer bound to its nic96 nuclear pore complex attachment site", "authors": ["C.J.Bley", "S.Petrovic", "M.Paduch", "V.Lu", "A.A.Kossiakoff", "A.Hoelz"], "doi": "10.1126/SCIENCE.AAC9176", "pmid": "26316600"}
+{"pdb": "5CWE", "title": "structure of cyp107l2 from streptomyces avermitilis with lauric acid", "authors": ["T.-V.Pham", "S.-H.Han", "J.-H.Kim", "D.-H.Kim", "L.-W.Kang"], "doi": null, "pmid": null}
+{"pdb": "7CW4", "title": "acetyl-coa acetyltransferase from bacillus cereus atcc 14579", "authors": ["J.Hong", "K.J.Kim"], "doi": "10.1016/J.BBRC.2020.09.048", "pmid": "32972748"}
+{"pdb": "2CWP", "title": "crystal structure of metrs related protein from pyrococcus horikoshii", "authors": ["K.Murayama", "M.Kato-Murayama", "M.Shirouzu", "S.Yokoyama", "Riken StructuralGenomics/proteomics Initiative (Rsgi)"], "doi": null, "pmid": null}
+{"pdb": "2CW7", "title": "crystal structure of intein homing endonuclease ii", "authors": ["H.Matsumura", "H.Takahashi", "T.Inoue", "H.Hashimoto", "M.Nishioka", "S.Fujiwara", "M.Takagi", "T.Imanaka", "Y.Kai"], "doi": "10.1002/PROT.20858", "pmid": "16493661"}
+{"pdb": "1CWU", "title": "brassica napus enoyl acp reductase a138g mutant complexed with nad+ and thienodiazaborine", "authors": ["A.Roujeinikova", "J.B.Rafferty", "D.W.Rice"], "doi": "10.1074/JBC.274.43.30811", "pmid": "10521472"}
+{"pdb": "3CWN", "title": "escherichia coli transaldolase b mutant f178y", "authors": ["T.Sandalova", "G.Schneider", "A.Samland"], "doi": "10.1074/JBC.M803184200", "pmid": "18687684"}
+{"pdb": "1CWL", "title": "human cyclophilin a complexed with 4 4-hydroxy-meleu cyclosporin", "authors": ["V.Mikol", "J.Kallen", "P.Taylor", "M.D.Walkinshaw"], "doi": "10.1006/JMBI.1998.2108", "pmid": "9769216"}
+{"pdb": "3CW2", "title": "crystal structure of the intact archaeal translation initiation factor 2 from sulfolobus solfataricus .", "authors": ["E.A.Stolboushkina", "S.V.Nikonov", "A.D.Nikulin", "U.Blaesi", "D.J.Manstein", "R.V.Fedorov", "M.B.Garber", "O.S.Nikonov"], "doi": "10.1016/J.JMB.2008.07.039", "pmid": "18675278"}
+{"pdb": "3CW9", "title": "4-chlorobenzoyl-coa ligase/synthetase in the thioester-forming conformation, bound to 4-chlorophenacyl-coa", "authors": ["A.S.Reger", "J.Cao", "R.Wu", "D.Dunaway-Mariano", "A.M.Gulick"], "doi": "10.1021/BI800696Y", "pmid": "18620418"}
+{"pdb": "3CWU", "title": "crystal structure of an alka host/guest complex 2'-fluoro-2'-deoxy-1, n6-ethenoadenine:thymine base pair", "authors": ["B.R.Bowman", "S.Lee", "S.Wang", "G.L.Verdine"], "doi": "10.1016/J.STR.2008.04.012", "pmid": "18682218"}
+{"pdb": "5CWF", "title": "crystal structure of de novo designed helical repeat protein dhr8", "authors": ["G.Bhabha", "D.C.Ekiert"], "doi": "10.1038/NATURE16162", "pmid": "26675729"}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
@ -1,36 +1,6 @@
-{"pid": " Q6GZX4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 43"}], "title": "Putative transcription factor 001R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZX3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 45"}], "title": "Uncharacterized protein 002L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 29"}], "title": "Uncharacterized protein 002R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q197F7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 23"}], "title": "Uncharacterized protein 003L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZX2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 3R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZX1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 38"}], "title": "Uncharacterized protein 004R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 005L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZX0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 47"}], "title": "Uncharacterized protein 005R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q91G88", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-06-28", "date_info": "entry version 53"}], "title": "Putative KilA-N domain-containing protein 006L", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZW9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 34"}], "title": "Uncharacterized protein 006R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 32"}], "title": "Uncharacterized protein 007R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197F3", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 007R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q197F2", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 22"}], "title": "Uncharacterized protein 008L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 67"}], "title": "Putative helicase 009L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q91G85", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 38"}], "title": "Uncharacterized protein 009R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZW5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 010R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197E9", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 011L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 011R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 012L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197E7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 37"}], "title": "Uncharacterized protein IIV3-013L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZW2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 30"}], "title": "Uncharacterized protein 013R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 014R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZW0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 50"}], "title": "Uncharacterized protein 015R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 017L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV7", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 018L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 87"}], "title": "Putative serine/threonine-protein kinase 019R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 40"}], "title": "Uncharacterized protein 020R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZV4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 021L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-12-14", "date_info": "entry version 35"}], "title": "Transmembrane protein 022L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZV2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 023R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 25"}], "title": "Uncharacterized protein 023R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q6GZV1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 024R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q197D5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 24"}], "title": "Uncharacterized protein 025R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
-{"pid": " Q91G70", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 026R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
-{"pid": " Q6GZU9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 49"}], "title": "Uncharacterized protein 027R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": " Q6GZU8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 55"}], "title": "Uncharacterized protein 028R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX4", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 41."}], "title": "Putative transcription factor 001R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX3", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 42."}], "title": "Uncharacterized protein 002L;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q197F8", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 27."}], "title": "Uncharacterized protein 002R;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
+{"pid": "Q197F7", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 23."}], "title": "Uncharacterized protein 003L;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
+{"pid": "Q6GZX2", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 36."}], "title": "Uncharacterized protein 3R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": "Q6GZX1", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 34."}], "title": "Uncharacterized protein 004R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala
@ -26,7 +26,7 @@ class MAGMappingTest {
  @Test
  def mappingMagType(): Unit = {

-    checkResult[Publication](MagUtility.createResultFromType(null, null), invisible = true, "Other literature type")
+    checkResult[Publication](MagUtility.createResultFromType(null, null), invisible = false, "Other literature type")
    checkResult[Publication](
      MagUtility.createResultFromType(Some("BookChapter"), null),
      invisible = false,
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
@ -17,6 +17,45 @@ import eu.dnetlib.pace.tree.support.TreeStats;

 class DecisionTreeTest {

+	@Test
+	void testJPath() throws IOException {
+
+		DedupConfig conf = DedupConfig
+			.load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_organization.json")));
+
+		final String org = IOUtils.toString(getClass().getResourceAsStream("organization.json"));
+
+		Row row = SparkModel.apply(conf).rowFromJson(org);
+
+		System.out.println("row = " + row);
+		Assertions.assertNotNull(row);
+		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
+
+		System.out.println("row = " + row.getAs("countrytitle"));
+	}
+
+	@Test
+	void jsonToModelTest() throws IOException {
+		DedupConfig conf = DedupConfig
+			.load(
+				IOUtils
+					.toString(
+						SparkOpenorgsDedupTest.class
+							.getResourceAsStream(
+								"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
+
+		final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json"));
+
+		Row row = SparkModel.apply(conf).rowFromJson(org);
+		// to check that the same parsing returns the same row
+		Row row1 = SparkModel.apply(conf).rowFromJson(org);
+
+		Assertions.assertEquals(row, row1);
+		System.out.println("row = " + row);
+		Assertions.assertNotNull(row);
+		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
+	}
+
 	@Test
 	void organizationDecisionTreeTest() throws Exception {
 		DedupConfig conf = DedupConfig
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@ -452,18 +452,18 @@ public class SparkDedupTest implements Serializable {
 			assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
 			assertEquals(ModelConstants.DEDUP, r.getSubRelType());
 			assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
-			assertFalse(dups.contains(r.getTarget()));
+			assertTrue(dups.contains(r.getTarget()));
 		});

 		final List<Relation> mergedIn = pubs
 			.filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'")
 			.collectAsList();
-		assertEquals(1, mergedIn.size());
+		assertEquals(3, mergedIn.size());
 		mergedIn.forEach(r -> {
 			assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
 			assertEquals(ModelConstants.DEDUP, r.getSubRelType());
-			assertEquals(ModelConstants.MERGES, r.getRelClass());
-			assertFalse(dups.contains(r.getSource()));
+			assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
+			assertTrue(dups.contains(r.getSource()));
 		});

 		System.out.println("orgs_mergerel = " + orgs_mergerel);
@ -473,8 +473,8 @@ public class SparkDedupTest implements Serializable {
 		System.out.println("orp_mergerel = " + orp_mergerel);

 		if (CHECK_CARDINALITIES) {
-			assertEquals(1278, orgs_mergerel);
-			assertEquals(1158, pubs.count());
+			assertEquals(1268, orgs_mergerel);
+			assertEquals(1156, pubs.count());
 			assertEquals(292, sw_mergerel);
 			assertEquals(476, ds_mergerel);
 			assertEquals(742, orp_mergerel);
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
@ -241,6 +241,7 @@ public class SparkPublicationRootsTest implements Serializable {

 		verifyRoot_case_1(roots, pubs);
 		verifyRoot_case_2(roots, pubs);
+		verifyRoot_case_3(roots, pubs);
 	}

 	private static void verifyRoot_case_1(Dataset<Publication> roots, Dataset<Publication> pubs) {
@ -321,6 +322,34 @@ public class SparkPublicationRootsTest implements Serializable {
 		assertTrue(Sets.difference(root_cf, dups_cf).isEmpty());
 	}

+	private void verifyRoot_case_3(Dataset<Publication> roots, Dataset<Publication> pubs) {
+		Publication root = roots
+			.filter("id = '50|dedup_wf_001::31ca734cc22181b704c4aa8fd050062a'")
+			.first();
+		assertNotNull(root);
+
+		Publication pivot_duplicate = pubs
+			.filter("id = '50|od_______166::31ca734cc22181b704c4aa8fd050062a'")
+			.first();
+
+		assertEquals(pivot_duplicate.getPublisher().getValue(), root.getPublisher().getValue());
+
+		Set<String> dups_cf = pubs
+			.collectAsList()
+			.stream()
+			.flatMap(p -> p.getCollectedfrom().stream())
+			.map(KeyValue::getValue)
+			.collect(Collectors.toCollection(HashSet::new));
+
+		Set<String> root_cf = root
+			.getCollectedfrom()
+			.stream()
+			.map(KeyValue::getValue)
+			.collect(Collectors.toCollection(HashSet::new));
+
+		assertTrue(Sets.difference(root_cf, dups_cf).isEmpty());
+	}
+
 	@Test
 	@Order(6)
 	void updateEntityTest() throws Exception {
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java
@ -143,9 +143,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 				"--graphBasePath", graphInputPath,
 				"--actionSetId", testActionSetId,
 				"--isLookUpUrl", "lookupurl",
-				"--workingPath", workingPath,
-				"--hiveMetastoreUris", "none",
-				"--pivotHistoryDatabase", ""
+				"--workingPath", workingPath
 			}), spark)
 				.run(isLookUpService);

@ -155,7 +153,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.as(Encoders.bean(Relation.class));

 		assertEquals(
-			4, merges
+			3, merges
 				.filter("relclass == 'isMergedIn'")
 				.map((MapFunction<Relation, String>) Relation::getTarget, Encoders.STRING())
 				.distinct()
@ -180,7 +178,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord")
 			.map(asEntity(Publication.class), Encoders.bean(Publication.class));

-		assertEquals(4, roots.count());
+		assertEquals(3, roots.count());

 		final Dataset<Publication> pubs = spark
 			.read()
@ -197,7 +195,7 @@ public class SparkPublicationRootsTest2 implements Serializable {
 			.collectAsList()
 			.get(0);

-		assertEquals("2022-01-01", root.getDateofacceptance().getValue());
+		assertEquals(crossref_duplicate.getDateofacceptance().getValue(), root.getDateofacceptance().getValue());
 		assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName());
 		assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted());
 		assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue());
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
@ -168,7 +168,7 @@ public class SparkStatsTest implements Serializable {
 			.load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats")
 			.count();

-		assertEquals(412, orgs_blocks);
+		assertEquals(414, orgs_blocks);
 		assertEquals(221, pubs_blocks);
 		assertEquals(134, sw_blocks);
 		assertEquals(196, ds_blocks);
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
@ -73,6 +73,12 @@
    "name": "Irish Nephrology Society",
    "synonym": []
  },
+  {
+    "id": "100011062",
+    "uri": "http://dx.doi.org/10.13039/100011062",
+    "name": "Asian Spinal Cord Network",
+    "synonym": []
+  },
  {
    "id": "100011096",
    "uri": "http://dx.doi.org/10.13039/100011096",
@ -217,6 +223,12 @@
    "name": "Global Brain Health Institute",
    "synonym": []
  },
+  {
+    "id": "100015776",
+    "uri": "http://dx.doi.org/10.13039/100015776",
+    "name": "Health and Social Care Board",
+    "synonym": []
+  },
  {
    "id": "100015992",
    "uri": "http://dx.doi.org/10.13039/100015992",
@ -391,6 +403,18 @@
    "name": "Irish Hospice Foundation",
    "synonym": []
  },
+  {
+    "id": "501100001596",
+    "uri": "http://dx.doi.org/10.13039/501100001596",
+    "name": "Irish Research Council for Science, Engineering and Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100001597",
+    "uri": "http://dx.doi.org/10.13039/501100001597",
+    "name": "Irish Research Council for the Humanities and Social Sciences",
+    "synonym": []
+  },
  {
    "id": "501100001598",
    "uri": "http://dx.doi.org/10.13039/501100001598",
@ -491,7 +515,7 @@
    "id": "501100002081",
    "uri": "http://dx.doi.org/10.13039/501100002081",
    "name": "Irish Research Council",
-    "synonym": ["501100001596", "501100001597"]
+    "synonym": []
  },
  {
    "id": "501100002736",
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -560,15 +560,7 @@ case object Crossref2Oaf {
                "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
                "10.13039/501100013589" | "10.13039/501100000271" =>
              generateSimpleRelationFromAward(funder, "ukri________", a => a)
-            //HFRI
-            case "10.13039/501100013209" =>
-              generateSimpleRelationFromAward(funder, "hfri________", a => a)
-              val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
-              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
-              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
-            //ERASMUS+
-            case "10.13039/501100010790" =>
-              generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
+
            case _ => logger.debug("no match for " + funder.DOI.get)

          }
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala
@ -313,7 +313,7 @@ case object ConversionUtil {
      if (f.author.DisplayName.isDefined)
        a.setFullname(f.author.DisplayName.get)
      if (f.affiliation != null)
-        a.setAffiliation(List(asField(f.affiliation)).asJava)
+        a.setRawAffiliationString(List(f.affiliation).asJava)
      a.setPid(
        List(
          createSP(
@ -386,7 +386,7 @@ case object ConversionUtil {
      a.setFullname(f.author.DisplayName.get)

      if (f.affiliation != null)
-        a.setAffiliation(List(asField(f.affiliation)).asJava)
+        a.setRawAffiliationString(List(f.affiliation).asJava)

      a.setPid(
        List(
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java
@ -13,13 +13,13 @@ public class CommunityContentprovider {
 	private String openaireId;
 	private SelectionConstraints selectioncriteria;

-	private Boolean enabled;
+	private String enabled;

-	public Boolean getEnabled() {
+	public String getEnabled() {
 		return enabled;
 	}

-	public void setEnabled(Boolean enabled) {
+	public void setEnabled(String enabled) {
 		this.enabled = enabled;
 	}

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
@ -53,8 +53,6 @@ public class Constraints implements Serializable {

 		for (Constraint sc : constraint) {
 			boolean verified = false;
-			if (!param.containsKey(sc.getField()))
-				return false;
 			for (String value : param.get(sc.getField())) {
 				if (sc.verifyCriteria(value.trim())) {
 					verified = true;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -14,7 +14,6 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@ -85,26 +84,19 @@ public class SparkCountryPropagationJob {
 		Dataset<R> res = readPath(spark, sourcePath, resultClazz);

 		log.info("Reading prepared info: {}", preparedInfoPath);
-		final Dataset<Row> preparedInfoRaw = spark
+		Dataset<ResultCountrySet> prepared = spark
 			.read()
-			.json(preparedInfoPath);
+			.json(preparedInfoPath)
+			.as(Encoders.bean(ResultCountrySet.class));
+
+		res
+			.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
+			.map(getCountryMergeFn(), Encoders.bean(resultClazz))
+			.write()
+			.option("compression", "gzip")
+			.mode(SaveMode.Overwrite)
+			.json(outputPath);

-		if (!preparedInfoRaw.isEmpty()) {
-			final Dataset<ResultCountrySet> prepared = preparedInfoRaw.as(Encoders.bean(ResultCountrySet.class));
-			res
-				.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
-				.map(getCountryMergeFn(), Encoders.bean(resultClazz))
-				.write()
-				.option("compression", "gzip")
-				.mode(SaveMode.Overwrite)
-				.json(outputPath);
-		} else {
-			res
-				.write()
-				.option("compression", "gzip")
-				.mode(SaveMode.Overwrite)
-				.json(outputPath);
-		}
 	}

 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
@ -147,7 +147,6 @@ public class CleanGraphSparkJob {
 			.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
-			.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
 			.filter((FilterFunction<T>) GraphCleaningFunctions::filter);

 		// read the master-duplicate tuples
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
@ -9,10 +9,7 @@ import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -25,8 +22,6 @@ public class GraphHiveTableImporterJob {

 	private static final Logger log = LoggerFactory.getLogger(GraphHiveTableImporterJob.class);

-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
 	public static void main(String[] args) throws Exception {

 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -74,7 +69,12 @@ public class GraphHiveTableImporterJob {
 	private static <T extends Oaf> void loadGraphTable(SparkSession spark, String inputPath, String hiveDbName,
 		Class<T> clazz, int numPartitions) {

-		Dataset<String> dataset = spark.read().textFile(inputPath);
+		final Encoder<T> clazzEncoder = Encoders.bean(clazz);
+
+		Dataset<Row> dataset = spark
+				.read()
+				.schema(clazzEncoder.schema())
+				.json(inputPath);

 		if (numPartitions > 0) {
 			log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions);
@ -82,7 +82,6 @@ public class GraphHiveTableImporterJob {
 		}

 		dataset
-			.map((MapFunction<String, T>) s -> OBJECT_MAPPER.readValue(s, clazz), Encoders.bean(clazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.saveAsTable(tableIdentifier(hiveDbName, clazz));
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -94,7 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 					author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
 				}

-				author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info));
+				author.setRawAffiliationString(prepareListString(n, "./*[local-name()='affiliation']"));
 				author.setPid(preparePids(n, info));
 				author.setRank(pos++);
 				res.add(author);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
@ -85,7 +85,7 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <fork name="fork_downloads_csv">
+       <fork name="fork_downloads_csv">
        <path start="download_gold"/>
        <path start="download_doaj_json"/>
    </fork>
@ -223,13 +223,11 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -255,13 +253,11 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -282,7 +278,6 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java
@ -73,14 +73,10 @@ public class GraphHiveImporterJobTest {
 		GraphHiveImporterJob
 			.main(
 				new String[] {
-					"-isSparkSessionManaged",
-					Boolean.FALSE.toString(),
-					"-inputPath",
-					getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(),
-					"-hiveMetastoreUris",
-					"",
-					"-hiveDbName",
-					dbName
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--inputPath", getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(),
+					"--hiveMetastoreUris", "",
+					"--hiveDbName", dbName
 				});

 		ModelSupport.oafTypes
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -406,15 +406,15 @@ class MappersTest {
 		assertEquals("Baracchini", author.get().getSurname());
 		assertEquals("Theo", author.get().getName());

-		assertEquals(1, author.get().getAffiliation().size());
-		final Optional<Field<String>> opAff = author
+		assertEquals(1, author.get().getRawAffiliationString().size());
+		final Optional<String> opAff = author
 			.get()
-			.getAffiliation()
+			.getRawAffiliationString()
 			.stream()
 			.findFirst();
 		assertTrue(opAff.isPresent());
-		final Field<String> affiliation = opAff.get();
-		assertEquals("ISTI-CNR", affiliation.getValue());
+		final String affiliation = opAff.get();
+		assertEquals("ISTI-CNR", affiliation);

 		assertFalse(d.getSubject().isEmpty());
 		assertFalse(d.getInstance().isEmpty());
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala
@ -31,5 +31,11 @@ class ORCIDAuthorMatchersTest {
    assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
    // assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
  }
+  @Test def testDocumentationNames(): Unit = {
+    assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones"))
+  }

+  @Test def testDocumentationNames2(): Unit = {
+    assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller Jones"))
+  }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
@ -69,7 +69,7 @@
        </configuration>
    </global>

-    <start to="irish_oaiphm_provision"/>
+    <start to="oaiphm_provision"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
@ -67,7 +67,7 @@ public class PrepareRelationsJobTest {
 	@Test
 	void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception {

-		final int maxRelations = 5;
+		final int maxRelations = 20;
 		PrepareRelationsJob
 			.main(
 				new String[] {
@ -86,7 +86,7 @@ public class PrepareRelationsJobTest {
 			.as(Encoders.bean(Relation.class))
 			.cache();

-		assertEquals(44, out.count());
+		assertEquals(maxRelations, out.count());

 		Dataset<Row> freq = out
 			.toDF()
@ -101,8 +101,12 @@ public class PrepareRelationsJobTest {
 		long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count");

 		assertEquals(outcome, participation);
-		assertEquals(outcome, affiliation);
-		assertEquals(4, affiliation);
+		assertTrue(outcome > affiliation);
+		assertTrue(participation > affiliation);
+
+		assertEquals(7, outcome);
+		assertEquals(7, participation);
+		assertEquals(6, affiliation);
 	}

 	protected List<Row> getRows(Dataset<Row> freq, String col) {
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
@ -91,6 +91,9 @@ class SolrRecordDumpJobTest {
 	public void prepareMocks() throws ISLookUpException, IOException {
 		isLookupClient.setIsLookup(isLookUpService);

+		Mockito
+			.when(isLookupClient.getDsId(Mockito.anyString()))
+			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
 			.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
@ -48,25 +48,16 @@
 			<case to="get-file-names">${wf:conf('resume') eq "format-results"}</case>
 			<case to="map-openaire-to-doi">${wf:conf('resume') eq "map-ids"}</case>
 			<case to="map-scores-to-dois">${wf:conf('resume') eq "map-scores"}</case>
-			<case to="clear-working-dir">${wf:conf('resume') eq "start"}</case>
+			<case to="create-openaire-ranking-graph">${wf:conf('resume') eq "start"}</case>

 			<!-- Aggregation of impact scores on the project level		-->
 			<case to="project-impact-indicators">${wf:conf('resume') eq "projects-impact"}</case>
 			<case to="create-actionset">${wf:conf('resume') eq "create-actionset"}</case>

-			<default to="clear-working-dir" />
+			<default to="create-openaire-ranking-graph" />
 		</switch>
 	</decision>

-	<action name="clear-working-dir">
-		<fs>
-			<delete path="${workingDir}"/>
-			<mkdir path="${workingDir}"/>
-		</fs>
-		<ok to="create-openaire-ranking-graph"/>
-		<error to="clear-working-dir-fail"/>
-	</action>
-
 	<!-- initial step: create citation network -->
 	<action name="create-openaire-ranking-graph">
 		<spark xmlns="uri:oozie:spark-action:0.2">
@ -627,10 +618,6 @@
 		<message>Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 	</kill>

-	<kill name="clear-working-dir-fail">
-		<message>Re-create working dir failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-	</kill>
-
 	<!-- Define ending node -->
 	<end name="end" />

--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
@ -32,7 +32,7 @@ select distinct * from (
       from SOURCE.result r
                join SOURCE.result_projects rp on rp.id=r.id
                join SOURCE.project p on p.id=rp.project
-                join openaire_prod_stats_monitor_ie_20231226b.irish_funders irf on irf.funder=p.funder
+                join TARGET.irish_funders irf on irf.funder=p.funder
       union all
       select r.*
       from SOURCE.result r
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
@ -1,79 +1,3 @@
--drop database if exists TARGET cascade;
--create database if not exists TARGET;
--
--create view if not exists TARGET.category as select * from SOURCE.category;
--create view if not exists TARGET.concept as select * from SOURCE.concept;
--create view if not exists TARGET.context as select * from SOURCE.context;
--create view if not exists TARGET.country as select * from SOURCE.country;
--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
--create view if not exists TARGET.funder as select * from SOURCE.funder;
--create view if not exists TARGET.fundref as select * from SOURCE.fundref;
--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--
--create table TARGET.result stored as parquet as
--    select distinct * from (
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
--             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
--             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
--             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
--             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
--             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
--             'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
--             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
--             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
--             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
--             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
--             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
--             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
--             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
--             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
--             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
--             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
--             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
--             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
--             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
--             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
--             'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
--             'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
--             'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
--             'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
--             'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
--             'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
--             'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
--             'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
--             'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
--             'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
--             'openorgs____::e15adb13c4dadd49de4d35c39b5da93a',  -- Nanyang Technological University
--             'openorgs____::4b34103bde246228fcd837f5f1bf4212',  -- Autonomous University of Barcelona
--             'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb',	-- McMaster University
--             'openorgs____::51c7fc556e46381734a25a6fbc3fd398',	-- University of Modena and Reggio Emilia
--             'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db',	-- Bilkent University
--             'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06',	-- Saints Cyril and Methodius University of Skopje
--             'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
--             'openorgs____::b8b8ca674452579f3f593d9f5e557483',   -- University College Cork
--             'openorgs____::38d7097854736583dde879d12dacafca'	-- Brown University
--             'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
--             'openorgs____::2530baca8a15936ba2e3297f2bce2e7e',	-- University of Cape Town
--             'openorgs____::d11f981828c485cd23d93f7f24f24db1',  -- Technological University Dublin
--             'openorgs____::5e6bf8962665cdd040341171e5c631d8',  -- Delft University of Technology
--             'openorgs____::846cb428d3f52a445f7275561a7beb5d',  -- University of Manitoba
--             'openorgs____::eb391317ed0dc684aa81ac16265de041',	-- Universitat Rovira i Virgili
--             'openorgs____::66aa9fc2fceb271423dfabcc38752dc0',  -- Lund University
--             'openorgs____::3cff625a4370d51e08624cc586138b2f'	-- IMT Atlantique
--        ) )) foo;
--
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
-
 create view if not exists TARGET.category as select * from SOURCE.category;
 create view if not exists TARGET.concept as select * from SOURCE.concept;
 create view if not exists TARGET.context as select * from SOURCE.context;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
@ -81,7 +81,17 @@ create table TARGET.result stored as parquet as
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
+             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
+             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
+             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
+             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
+             'openorgs____::b316f25380d106aac402f5ae8653910d',  -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::45a2076eee3013e0e85625ce61bcd272',  -- Institut d'Investigació Sanitària Illes Balears
+             'openorgs____::00b20b0a743a96169e6cf135e6e2bd7c',  -- Universidad Publica De Navarra
+             'openorgs____::0f398605c2459294d125ff23473a97dc',  -- Aalto University
+             'openorgs____::25b1fa62c7fd8e409d3a83c07e04b2d4',  -- WHU-Otto Beisheim School of Management
+             'openorgs____::d6eec313417f11205db4e736a34c0db6',  -- KEMPELENOV INSTITUT INTELIGENTNYCH TECHNOLOGII
+             'openorgs____::c2dfb90e797a2dc52f0084c549289d0c'  -- National Research Institute for Agriculture, Food and Environment
        ))) foo;

 --ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
@ -61,7 +61,17 @@ create table TARGET.result stored as parquet as
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
+             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
+             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
+             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
+             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
+             'openorgs____::b316f25380d106aac402f5ae8653910d',  -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::45a2076eee3013e0e85625ce61bcd272',  -- Institut d'Investigació Sanitària Illes Balears
+             'openorgs____::00b20b0a743a96169e6cf135e6e2bd7c',  -- Universidad Publica De Navarra
+             'openorgs____::0f398605c2459294d125ff23473a97dc',  -- Aalto University
+             'openorgs____::25b1fa62c7fd8e409d3a83c07e04b2d4',  -- WHU-Otto Beisheim School of Management
+             'openorgs____::d6eec313417f11205db4e736a34c0db6',  -- KEMPELENOV INSTITUT INTELIGENTNYCH TECHNOLOGII
+             'openorgs____::c2dfb90e797a2dc52f0084c549289d0c'  -- National Research Institute for Agriculture, Food and Environment
        )))  foo;

 --ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
--- a/dhp-workflows/dhp-stats-update/installProject.sh
+++ b/dhp-workflows/dhp-stats-update/installProject.sh
@ -0,0 +1,18 @@
+# Install the whole "dnet-hadoop" project.
+
+# Delete this module's previous build-files in order to avoid any conflicts.
+rm -rf target/ ||
+
+# Go to the root directory of this project.
+cd ../../
+
+# Select the build profile.
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+# Install the project.
+mvn clean install -U ${CHOSEN_MAVEN_PROFILE} -Dmaven.test.skip=true
+
+# We skip tests for all modules, since the take a big amount of time and some of them fail.
+# Any test added to this module, will be executed in the "runOozieWorkflow.sh" script.
--- a/dhp-workflows/dhp-stats-update/runOozieWorkfow.sh
+++ b/dhp-workflows/dhp-stats-update/runOozieWorkfow.sh
@ -0,0 +1,20 @@
+# This script deploys and runs the oozie workflow on the cluster, defined in the "~/.dhp/application.properties" file.
+
+# Select the build profile.
+DEFAULT_PROFILE=''  # It's the empty profile.
+NEWER_VERSIONS_PROFILE='-Pscala-2.12'
+CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
+
+# Build and deploy this module.
+mvn clean package -U ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
+      -Dworkflow.source.dir=eu/dnetlib/dhp/oa/graph/stats
+
+# Show the Oozie-job-ID.
+echo -e "\n\nShowing the contents of \"extract-and-run-on-remote-host.log\":\n"
+cat ./target/extract-and-run-on-remote-host.log
+
+# Check oozie workflow status
+# oozie job -oozie http://iis-cdh5-test-m3:11000/oozie -info <workflow-ID>
+
+# Get the <job-ID> from the previous output and check the logs:
+# yarn logs -applicationId application_<job-ID>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql
@ -1,8 +1,10 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 --------------------------------------------------------------
 --------------------------------------------------------------
 -- Stats database creation
 --------------------------------------------------------------
 --------------------------------------------------------------

-DROP database IF EXISTS ${stats_db_name} CASCADE;
-CREATE database ${stats_db_name};
+DROP database IF EXISTS ${stats_db_name} CASCADE; /*EOS*/
+CREATE database ${stats_db_name}; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
@ -1,3 +1,5 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 ------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------------------------
 -- Tables/views from external tables/views (Fundref, Country, CountyGDP, roarmap, rndexpediture)
@ -5,27 +7,27 @@
 ------------------------------------------------------------------------------------------------
 CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
 SELECT *
-FROM ${external_stats_db_name}.fundref;
+FROM ${external_stats_db_name}.fundref; /*EOS*/

 CREATE OR REPLACE VIEW ${stats_db_name}.country AS
 SELECT *
-FROM ${external_stats_db_name}.country;
+FROM ${external_stats_db_name}.country; /*EOS*/

 CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
 SELECT *
-FROM ${external_stats_db_name}.countrygdp;
+FROM ${external_stats_db_name}.countrygdp; /*EOS*/

 CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
 SELECT *
-FROM ${external_stats_db_name}.roarmap;
+FROM ${external_stats_db_name}.roarmap; /*EOS*/

 CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
 SELECT *
-FROM ${external_stats_db_name}.rndexpediture;
+FROM ${external_stats_db_name}.rndexpediture; /*EOS*/

 CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
 SELECT *
-FROM ${external_stats_db_name}.licenses_normalized;
+FROM ${external_stats_db_name}.licenses_normalized; /*EOS*/

 ------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------------------------
@ -33,23 +35,23 @@ FROM ${external_stats_db_name}.licenses_normalized;
 ------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------------------------
 create or replace view ${stats_db_name}.usage_stats as
-select * from openaire_prod_usage_stats.usage_stats;
+select * from openaire_prod_usage_stats.usage_stats; /*EOS*/

 create or replace view ${stats_db_name}.downloads_stats as
-select * from openaire_prod_usage_stats.downloads_stats;
+select * from openaire_prod_usage_stats.downloads_stats; /*EOS*/

 create or replace view ${stats_db_name}.pageviews_stats as
-select * from openaire_prod_usage_stats.pageviews_stats;
+select * from openaire_prod_usage_stats.pageviews_stats; /*EOS*/

 create or replace view ${stats_db_name}.views_stats as
-select * from openaire_prod_usage_stats.views_stats;
+select * from openaire_prod_usage_stats.views_stats; /*EOS*/

 ------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------------------------
 -- Creation date of the database
 ------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------------------------
-DROP TABLE IF EXISTS ${stats_db_name}.creation_date purge;
+DROP TABLE IF EXISTS ${stats_db_name}.creation_date purge; /*EOS*/

 create table ${stats_db_name}.creation_date STORED AS PARQUET as
-select date_format(current_date(), 'dd-MM-yyyy') as date;
+select date_format(current_date(), 'dd-MM-yyyy') as date; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
@ -1,110 +1,11 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 ----------------------------------------------------------------
 ----------------------------------------------------------------
 -- Post processing - Updates on main tables
 ----------------------------------------------------------------
 ----------------------------------------------------------------

--Datasource temporary table updates
-UPDATE ${stats_db_name}.datasource_tmp
-SET harvested='true'
-WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
-                            FROM ${stats_db_name}.datasource_tmp d,
-                                 ${stats_db_name}.result_datasources rd
-                            WHERE d.id = rd.datasource);
-
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
-UPDATE ${stats_db_name}.project_tmp
-SET haspubs='yes'
-WHERE project_tmp.id IN (SELECT pr.id
-                         FROM ${stats_db_name}.project_results pr,
-                              ${stats_db_name}.result r
-                         WHERE pr.result = r.id
-                           AND r.type = 'publication');
-DROP TABLE IF EXISTS ${stats_db_name}.stored purge;
-
-CREATE TABLE ${stats_db_name}.project stored as parquet as
-SELECT p.id,
-       p.acronym,
-       p.title,
-       p.funder,
-       p.funding_lvl0,
-       p.funding_lvl1,
-       p.funding_lvl2,
-       p.ec39,
-       p.type,
-       p.startdate,
-       p.enddate,
-       p.start_year,
-       p.end_year,
-       p.duration,
-       CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END            AS haspubs,
-       CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END             AS numpubs,
-       CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
-       CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END             AS delayedpubs,
-       p.callidentifier,
-       p.code,
-       p.totalcost,
-       p.fundedamount,
-       p.currency
-FROM ${stats_db_name}.project_tmp p
-         LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
-                    FROM ${stats_db_name}.project_results pr
-                             INNER JOIN ${stats_db_name}.result r ON pr.result = r.id
-                    WHERE r.type = 'publication'
-                    GROUP BY pr.id) AS prr1 on prr1.id = p.id
-         LEFT JOIN (SELECT pp.id,
-                           max(datediff(to_date(r.date), to_date(pp.enddate))) AS daysForlastPub,
-                           count(distinct r.id)                                AS dp
-                    FROM ${stats_db_name}.project_tmp pp,
-                         ${stats_db_name}.project_results pr,
-                         ${stats_db_name}.result r
-                    WHERE pp.id = pr.id
-                      AND pr.result = r.id
-                      AND r.type = 'publication'
-                      AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
-                    GROUP BY pp.id) AS prr2
-                   ON prr2.id = p.id;
-
-UPDATE ${stats_db_name}.publication_tmp
-SET delayed = 'yes'
-WHERE publication_tmp.id IN (SELECT distinct r.id
-                             FROM ${stats_db_name}.result r,
-                                  ${stats_db_name}.project_results pr,
-                                  ${stats_db_name}.project_tmp p
-                             WHERE r.id = pr.result
-                               AND pr.id = p.id
-                               AND to_date(r.date) - to_date(p.enddate) > 0);
-
-UPDATE ${stats_db_name}.dataset_tmp
-SET delayed = 'yes'
-WHERE dataset_tmp.id IN (SELECT distinct r.id
-                         FROM ${stats_db_name}.result r,
-                              ${stats_db_name}.project_results pr,
-                              ${stats_db_name}.project_tmp p
-                         WHERE r.id = pr.result
-                           AND pr.id = p.id
-                           AND to_date(r.date) - to_date(p.enddate) > 0);
-
-UPDATE ${stats_db_name}.software_tmp
-SET delayed = 'yes'
-WHERE software_tmp.id IN (SELECT distinct r.id
-                          FROM ${stats_db_name}.result r,
-                               ${stats_db_name}.project_results pr,
-                               ${stats_db_name}.project_tmp p
-                          WHERE r.id = pr.result
-                            AND pr.id = p.id
-                            AND to_date(r.date) - to_date(p.enddate) > 0);
-
-UPDATE ${stats_db_name}.otherresearchproduct_tmp
-SET delayed = 'yes'
-WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
-                                      FROM ${stats_db_name}.result r,
-                                           ${stats_db_name}.project_results pr,
-                                           ${stats_db_name}.project_tmp p
-                                      WHERE r.id = pr.result
-                                        AND pr.id = p.id
-                                        AND to_date(r.date) - to_date(p.enddate) > 0);
-
 CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
 SELECT result_projects.id          AS result,
       result_projects.project     AS project_results,
@ -116,4 +17,4 @@ FROM ${stats_db_name}.result_projects,
     ${stats_db_name}.project
 WHERE result_projects.id = result.id
  AND result.type = 'publication'
-  AND project.id = result_projects.project;
+  AND project.id = result_projects.project; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql
@ -1,42 +1,4 @@
------------------------------------------------------------------------------------------------------
-- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables
------------------------------------------------------------------------------------------------------
-DROP TABLE IF EXISTS ${stats_db_name}.datasource purge;
-
-CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS
-SELECT *
-FROM ${stats_db_name}.datasource_tmp;
-
-DROP TABLE IF EXISTS ${stats_db_name}.publication purge;
-
-CREATE TABLE ${stats_db_name}.publication stored AS parquet AS
-SELECT *
-FROM ${stats_db_name}.publication_tmp;
-
-DROP TABLE IF EXISTS ${stats_db_name}.dataset purge;
-
-CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS
-SELECT *
-FROM ${stats_db_name}.dataset_tmp;
-
-DROP TABLE IF EXISTS ${stats_db_name}.software purge;
-
-CREATE TABLE ${stats_db_name}.software stored AS parquet AS
-SELECT *
-FROM ${stats_db_name}.software_tmp;
-
-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct purge;
-
-CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS
-SELECT *
-FROM ${stats_db_name}.otherresearchproduct_tmp;
-
-DROP TABLE ${stats_db_name}.project_tmp;
-DROP TABLE ${stats_db_name}.datasource_tmp;
-DROP TABLE ${stats_db_name}.publication_tmp;
-DROP TABLE ${stats_db_name}.dataset_tmp;
-DROP TABLE ${stats_db_name}.software_tmp;
-DROP TABLE ${stats_db_name}.otherresearchproduct_tmp;
+set mapred.job.queue.name=analytics; /*EOS*/

 ----------------------------------------------
 -- Re-creating views from final parquet tables
@ -54,4 +16,4 @@ SELECT *, bestlicence AS access_mode
 FROM ${stats_db_name}.dataset
 UNION ALL
 SELECT *, bestlicence AS access_mode
-FROM ${stats_db_name}.otherresearchproduct;
+FROM ${stats_db_name}.otherresearchproduct; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@ -1,3 +1,5 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 ------------------------------------------------------
 ------------------------------------------------------
 -- Additional relations
@ -5,10 +7,10 @@
 -- Sources related tables/views
 ------------------------------------------------------
 ------------------------------------------------------
-DROP TABLE IF EXISTS ${stats_db_name}.publication_sources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_sources purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
-SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource 
+SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
 FROM (
    SELECT  substr(p.id, 4) as id, substr(datasource, 4) as datasource 
 from ${openaire_db_name}.publication p lateral view explode(p.collectedfrom.key) c as datasource) p 
@ -16,12 +18,12 @@ LEFT OUTER JOIN
 (
    SELECT substr(d.id, 4) id 
    from ${openaire_db_name}.datasource d 
-    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
+    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
-SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource 
+SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
 FROM (
    SELECT  substr(p.id, 4) as id, substr(datasource, 4) as datasource 
 from ${openaire_db_name}.dataset p lateral view explode(p.collectedfrom.key) c as datasource) p 
@ -29,12 +31,12 @@ LEFT OUTER JOIN
 (
    SELECT substr(d.id, 4) id 
    from ${openaire_db_name}.datasource d 
-    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
+    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
    
-DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
-SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource 
+SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
 FROM (
    SELECT  substr(p.id, 4) as id, substr(datasource, 4) as datasource 
 from ${openaire_db_name}.software p lateral view explode(p.collectedfrom.key) c as datasource) p 
@ -42,12 +44,12 @@ LEFT OUTER JOIN
 (
    SELECT substr(d.id, 4) id 
    from ${openaire_db_name}.datasource d 
-    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
+    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
    
-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
-SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource 
+SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
 FROM (
    SELECT  substr(p.id, 4) as id, substr(datasource, 4) as datasource 
 from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.collectedfrom.key) c as datasource) p 
@ -55,7 +57,7 @@ LEFT OUTER JOIN
 (
    SELECT substr(d.id, 4) id 
    from ${openaire_db_name}.datasource d 
-    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
+    WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/

 CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
 SELECT * FROM ${stats_db_name}.publication_sources
@ -64,24 +66,24 @@ SELECT * FROM ${stats_db_name}.dataset_sources
 UNION ALL
 SELECT * FROM ${stats_db_name}.software_sources
 UNION ALL
-SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
+SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_orcid purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_orcid purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
-select distinct res.id, upper(regexp_replace(res.orcid, 'http://orcid.org/' ,'')) as orcid
+select /*+ COALESCE(100) */ distinct res.id, upper(regexp_replace(res.orcid, 'http://orcid.org/' ,'')) as orcid
 from (
    SELECT substr(res.id, 4) as id, auth_pid.value as orcid
    FROM ${openaire_db_name}.result res
    LATERAL VIEW explode(author) a as auth
    LATERAL VIEW explode(auth.pid) ap as auth_pid
    LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
-    WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;
+    WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_result purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_result purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
-select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
+select /*+ COALESCE(100) */ substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.result r1 on rel.source=r1.id
 join ${openaire_db_name}.result r2 on r2.id=rel.target
@ -91,12 +93,12 @@ where reltype='resultResult'
    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
-    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
+    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
-select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
+select /*+ COALESCE(100) */ substr(target, 4) as id, count(distinct substr(source, 4)) as citations
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.result r1 on rel.source=r1.id
 join ${openaire_db_name}.result r2 on r2.id=rel.target
@ -108,12 +110,12 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
-group by substr(target, 4);
+group by substr(target, 4); /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
-select substr(source, 4) as id, count(distinct substr(target, 4)) as references
+select /*+ COALESCE(100) */ substr(source, 4) as id, count(distinct substr(target, 4)) as references
 from ${openaire_db_name}.relation rel
         join ${openaire_db_name}.result r1 on rel.source=r1.id
         join ${openaire_db_name}.result r2 on r2.id=rel.target
@ -125,4 +127,4 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
-group by substr(source, 4);
+group by substr(source, 4); /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
@ -1,4 +1,5 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/
+
 ------------------------------------------------------
 ------------------------------------------------------
 -- Additional relations
@ -6,33 +7,33 @@ set mapred.job.queue.name=analytics;
 -- Licences related tables/views
 ------------------------------------------------------
 ------------------------------------------------------
-DROP TABLE IF EXISTS ${stats_db_name}.publication_licenses purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_licenses purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, licenses.value as type
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
 from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
-where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
+where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge;
+DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, licenses.value as type 
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
 from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
-where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
+where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge;
+DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, licenses.value as type 
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
 from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
-where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
+where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge;
+DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, licenses.value as type 
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
 from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
-where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
+where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/

 CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
 SELECT * FROM ${stats_db_name}.publication_licenses
@ -41,29 +42,29 @@ SELECT * FROM ${stats_db_name}.dataset_licenses
 UNION ALL
 SELECT * FROM ${stats_db_name}.software_licenses
 UNION ALL
-SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
+SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.organization_pids purge;
+DROP TABLE IF EXISTS ${stats_db_name}.organization_pids purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
-select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid 
-from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
+select /*+ COALESCE(100) */ substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
+from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
-SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource 
+SELECT /*+ COALESCE(100) */ o.id, case when d.id is null then 'other' else o.datasource end as datasource
 FROM (
-    SELECT  substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource 
+    SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource
    from ${openaire_db_name}.organization o lateral view explode(o.collectedfrom) instances as instance) o 
    LEFT OUTER JOIN (
        SELECT substr(d.id, 4) id 
        from ${openaire_db_name}.datasource d 
-        WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
+        WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge; /*EOS*/

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
-select distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
+select /*+ COALESCE(100) */ distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
    lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute
-WHERE datainfo.deletedbyinference=false and datainfo.invisible = FALSE;
+WHERE datainfo.deletedbyinference=false and datainfo.invisible = FALSE; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
@ -1,4 +1,4 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/

 ------------------------------------------------------
 ------------------------------------------------------
@ -8,7 +8,7 @@ set mapred.job.queue.name=analytics;
 ------------------------------------------------------
 ------------------------------------------------------

-DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; /*EOS*/
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
 with peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
@ -18,15 +18,15 @@ non_peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
    from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
+select /*+ COALESCE(100) */ distinct *
 from (
    select peer_reviewed.* from peer_reviewed
    union all
    select non_peer_reviewed.* from non_peer_reviewed
    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+    where peer_reviewed.id is null) pr; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
+DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; /*EOS*/
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
 with peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
@ -36,15 +36,15 @@ non_peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
    from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
+select /*+ COALESCE(100) */ distinct *
 from (
    select peer_reviewed.* from peer_reviewed
    union all
    select non_peer_reviewed.* from non_peer_reviewed
    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+    where peer_reviewed.id is null) pr; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
+DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; /*EOS*/
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
 with peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
@ -54,15 +54,15 @@ non_peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
    from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
+select /*+ COALESCE(100) */ distinct *
 from (
    select peer_reviewed.* from peer_reviewed
    union all
    select non_peer_reviewed.* from non_peer_reviewed
    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+    where peer_reviewed.id is null) pr; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
+DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; /*EOS*/
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
 with peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
@ -72,13 +72,13 @@ non_peer_reviewed as (
    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
    from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
+select /*+ COALESCE(100) */ distinct *
 from (
    select peer_reviewed.* from peer_reviewed
    union all
    select non_peer_reviewed.* from non_peer_reviewed
    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+    where peer_reviewed.id is null) pr; /*EOS*/

 CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
 select * from ${stats_db_name}.publication_refereed
@ -87,23 +87,23 @@ select * from ${stats_db_name}.dataset_refereed
 union all
 select * from ${stats_db_name}.software_refereed
 union all
-select * from ${stats_db_name}.otherresearchproduct_refereed;
+select * from ${stats_db_name}.otherresearchproduct_refereed; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.indi_impact_measures purge;
+DROP TABLE IF EXISTS ${stats_db_name}.indi_impact_measures purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_impact_measures STORED AS PARQUET as
-select substr(id, 4) as id, measures_ids.id impactmetric, cast(measures_ids.unit.value[0] as double) score,
+select /*+ COALESCE(100) */ substr(id, 4) as id, measures_ids.id impactmetric, cast(measures_ids.unit.value[0] as double) score,
 cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] impact_class
 from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
-where measures_ids.id!='views' and measures_ids.id!='downloads';
+where measures_ids.id!='views' and measures_ids.id!='downloads'; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge; /*EOS*/

 create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as
-select distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name,
+select /*+ COALESCE(100) */ distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name,
 cast(rel.properties[0].value as double) apc_amount,
 rel.properties[1].value apc_currency
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.organization o on o.id=rel.source
 join ${openaire_db_name}.result r on r.id=rel.target
-where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
+where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
@ -1,27 +1,27 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/

 -------------------------------------------
 --- Extra tables, mostly used by indicators

-DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/

 create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
-select r.id, count(distinct p.id) as count
+select /*+ COALESCE(100) */ r.id, count(distinct p.id) as count
 from ${stats_db_name}.result r
 left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
 left outer join ${stats_db_name}.project p on p.id=rp.project
-group by r.id;
+group by r.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/

 create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
-select r.id, count(distinct p.funder) as count
+select /*+ COALESCE(100) */ r.id, count(distinct p.funder) as count
 from ${stats_db_name}.result r
 left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
 left outer join ${stats_db_name}.project p on p.id=rp.project
-group by r.id;
+group by r.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge;
+DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge; /*EOS*/

 create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
 with rcount as (
@ -30,39 +30,39 @@ with rcount as (
    left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
    left outer join ${stats_db_name}.result r on r.id=rp.id
    group by r.type, p.id )
-select rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications,
+select /*+ COALESCE(100) */ rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications,
    sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets,
    sum(case when rcount.type='software' then rcount.count else 0 end) as software,
    sum(case when rcount.type='other' then rcount.count else 0 end) as other
 from rcount
-group by rcount.pid;
+group by rcount.pid; /*EOS*/

-create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
-create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure;
-create or replace view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents;
-create or replace view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers;
-create or replace view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft;
-create or replace view ${stats_db_name}.hrrst as select * from stats_ext.hrrst;
-create or replace view ${stats_db_name}.graduatedoctorates as select * from stats_ext.graduatedoctorates;
+create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; /*EOS*/
+create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; /*EOS*/
+create or replace view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents; /*EOS*/
+create or replace view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers; /*EOS*/
+create or replace view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft; /*EOS*/
+create or replace view ${stats_db_name}.hrrst as select * from stats_ext.hrrst; /*EOS*/
+create or replace view ${stats_db_name}.graduatedoctorates as select * from stats_ext.graduatedoctorates; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_instance purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_instance purge; /*EOS*/

 create table if not exists ${stats_db_name}.result_instance stored as parquet as
-select distinct r.*
+select /*+ COALESCE(100) */ distinct r.*
 from (
         select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom,
                substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
         from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
-join ${stats_db_name}.result res on res.id=r.id;
+join ${stats_db_name}.result res on res.id=r.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge;
+DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge; /*EOS*/

 create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
-select distinct r.id, r.amount, r.currency
+select /*+ COALESCE(100) */ distinct r.id, r.amount, r.currency
 from (
         select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
         from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
 join ${stats_db_name}.result res on res.id=r.id
-where r.amount is not null;
+where r.amount is not null; /*EOS*/

-create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;
+create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@ -1,7 +1,7 @@
 -- Sprint 1 ----
 drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
-select distinct p.id, coalesce(green_oa, 0) as green_oa
+select /*+ COALESCE(100) */ distinct p.id, coalesce(green_oa, 0) as green_oa
 from ${stats_db_name}.publication p
 left outer join (
    select p.id, 1 as green_oa
@ -12,7 +12,7 @@ left outer join (

 drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as
-select distinct p.id, coalesce(grey_lit, 0) as grey_lit
+select /*+ COALESCE(100) */ distinct p.id, coalesce(grey_lit, 0) as grey_lit
 from ${stats_db_name}.publication p
 left outer join (
    select p.id, 1 as grey_lit
@ -23,7 +23,7 @@ left outer join (

 drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as
-select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
+select /*+ COALESCE(100) */ distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
 from ${stats_db_name}.publication p
 left outer join (
    select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
@ -33,7 +33,7 @@ left outer join (
 -- Sprint 2 ----
 drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as
-select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
+select /*+ COALESCE(100) */ distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
 from ${stats_db_name}.result r
 left outer join (
    select r.id, license.type as lic from ${stats_db_name}.result r
@ -42,7 +42,7 @@ left outer join (

 drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as
-select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
+select /*+ COALESCE(100) */ distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
 from ${stats_db_name}.result r
 left outer join (
    select r.id, lower(parse_url(license.type, "HOST")) as lic_host
@ -52,12 +52,12 @@ left outer join (

 drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as
-select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract
+select /*+ COALESCE(100) */ distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract
 from ${stats_db_name}.publication; /*EOS*/

 drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as
-select distinct r.id, coalesce(has_orcid, 0) as has_orcid
+select /*+ COALESCE(100) */ distinct r.id, coalesce(has_orcid, 0) as has_orcid
 from ${stats_db_name}.result r
 left outer join (
    select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; /*EOS*/
@ -66,7 +66,7 @@ left outer join (
 ---- Sprint 3 ----
 drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as
-select distinct r.result as id, coalesce(fundref, 0) as fundref
+select /*+ COALESCE(100) */ distinct r.result as id, coalesce(fundref, 0) as fundref
 from ${stats_db_name}.project_results r
 left outer join (
    select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp on r.result= tmp.result; /*EOS*/
@ -77,7 +77,7 @@ create table if not exists ${stats_db_name}.indi_result_org_collab stored as par
        SELECT ro.organization organization, ro.id, o.name
        from ${stats_db_name}.result_organization ro
        join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null)
-    select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
+    select /*+ COALESCE(100) */ o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
    from tmp as o1
    join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
    group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
@ -89,7 +89,7 @@ create table if not exists ${stats_db_name}.indi_result_org_country_collab store
        from ${stats_db_name}.result_organization ro
        join ${stats_db_name}.organization o on o.id=ro.organization
        where country <> 'UNKNOWN'  and o.name is not null)
-    select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
+    select /*+ COALESCE(100) */ o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
    from tmp as o1 join tmp as o2 on o1.id=o2.id
    where o1.id=o2.id and o1.country!=o2.country
    group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
@ -100,7 +100,7 @@ create table if not exists ${stats_db_name}.indi_project_collab_org stored as pa
        select o.id organization, o.name, ro.project as project
        from ${stats_db_name}.organization o
        join ${stats_db_name}.organization_projects ro on o.id=ro.id  where o.name is not null)
-    select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
+    select /*+ COALESCE(100) */ o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
    from tmp as o1
    join tmp as o2 on o1.project=o2.project
    where o1.organization<>o2.organization and o1.name<>o2.name
@ -112,7 +112,7 @@ create table if not exists ${stats_db_name}.indi_project_collab_org_country stor
        select o.id organization, o.name, o.country , ro.project as project
        from ${stats_db_name}.organization o
        join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null)
-    select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
+    select /*+ COALESCE(100) */ o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
    from tmp as o1
    join tmp as o2 on o1.project=o2.project
    where o1.organization<>o2.organization and o1.country<>o2.country
@ -124,7 +124,7 @@ create table if not exists ${stats_db_name}.indi_funder_country_collab stored as
        join ${stats_db_name}.organization o on o.id=op.id
        join ${stats_db_name}.project p on p.id=op.project
        where country <> 'UNKNOWN')
-    select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
+    select /*+ COALESCE(100) */ f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
    from tmp as f1
    join tmp as f2 on f1.project=f2.project
    where f1.country<>f2.country
@ -136,7 +136,7 @@ create table if not exists ${stats_db_name}.indi_result_country_collab stored as
        select distinct country, ro.id as result  from ${stats_db_name}.organization o
        join ${stats_db_name}.result_organization ro on o.id=ro.organization
        where country <> 'UNKNOWN' and o.name is not null)
-    select o1.country country1, o2.country country2, count(o1.result) as collaborations
+    select /*+ COALESCE(100) */ o1.country country1, o2.country country2, count(o1.result) as collaborations
    from tmp as o1
    join tmp as o2 on o1.result=o2.result
    where o1.country<>o2.country
@ -146,7 +146,7 @@ create table if not exists ${stats_db_name}.indi_result_country_collab stored as
 ---- Sprint 4 ----
 drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
-    select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
+    select /*+ COALESCE(100) */ distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
    from ${stats_db_name}.publication_datasources pd
    left outer join (
        select pd.id, 1 as in_diamond_journal
@ -157,7 +157,7 @@ create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet a

 drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as
-    select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
+    select /*+ COALESCE(100) */ distinct pd.id, coalesce(is_transformative, 0) as is_transformative
    from ${stats_db_name}.publication pd
    left outer join (
        select  pd.id, 1 as is_transformative
@ -168,7 +168,7 @@ create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as

 drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as
-    select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
+    select /*+ COALESCE(100) */ distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
    from ${stats_db_name}.result_instance ri
    left outer join (
        select ri.id, 1 as pub_closed_other_open
@ -182,14 +182,14 @@ create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as
 ---- Sprint 5 ----
 drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as
-    select id, count(id) as number_of_copies
+    select /*+ COALESCE(100) */ id, count(id) as number_of_copies
    from ${stats_db_name}.result_instance
    group by id; /*EOS*/

 ---- Sprint 6 ----
 drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as
-    SELECT result_id, sum(downloads) no_downloads
+    SELECT /*+ COALESCE(100) */ result_id, sum(downloads) no_downloads
    from openaire_prod_usage_stats.usage_stats
    join ${stats_db_name}.publication on result_id=id
    where downloads>0
@ -197,7 +197,7 @@ create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet

 drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as
-    SELECT result_id, repository_id, sum(downloads) no_downloads
+    SELECT /*+ COALESCE(100) */ result_id, repository_id, sum(downloads) no_downloads
    from openaire_prod_usage_stats.usage_stats
    join ${stats_db_name}.publication on result_id=id
    where downloads>0
@ -205,14 +205,14 @@ create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored

 drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as
-    SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
+    SELECT /*+ COALESCE(100) */ result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
    from openaire_prod_usage_stats.usage_stats us
    join ${stats_db_name}.publication on result_id=id where downloads>0
    GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/

 drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as
-    SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads
+    SELECT /*+ COALESCE(100) */ result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads
    from openaire_prod_usage_stats.usage_stats us
    join ${stats_db_name}.publication on result_id=id
    where downloads>0
@ -241,7 +241,7 @@ create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet a
            UNION ALL
            select id, issn_online as issn from ${stats_db_name}.datasource d left semi join gold_oa on gold_oa.issn=d.issn_online) foo
    )
-    SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
+    SELECT /*+ COALESCE(100) */ DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
    FROM ${stats_db_name}.publication pd
    left outer join (
            select pd.id, 1 as is_gold
@ -272,7 +272,7 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
        FROM ${stats_db_name}.datasource
        WHERE issn_online IS NOT NULL ) as issn
        WHERE LENGTH(issn) > 7)
-    SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
+    SELECT /*+ COALESCE(100) */ DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
    FROM ${stats_db_name}.publication_datasources pd
    LEFT OUTER JOIN (
        SELECT pd.id, 1 as is_hybrid_oa from ${stats_db_name}.publication_datasources pd
@ -284,7 +284,7 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as

 drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
 create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
-select distinct p.id, coalesce(is_hybrid, 0) is_hybrid
+select /*+ COALESCE(100) */ distinct p.id, coalesce(is_hybrid, 0) is_hybrid
 from ${stats_db_name}.publication p
 left outer join (
    select p.id, 1 as is_hybrid
@ -313,7 +313,7 @@ create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet
        where  cast(year as int)>2003
        group by ro.organization)
 --return results_fair/all_results
-    select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
+    select /*+ COALESCE(100) */ allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
    from allresults
    join result_fair on result_fair.organization=allresults.organization; /*EOS*/

@ -336,7 +336,7 @@ select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name
 drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_fairness_pub_pr stored as parquet as
-select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
+select /*+ COALESCE(100) */ ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
 from allresults ar
         join result_fair rf on rf.organization=ar.organization; /*EOS*/

@ -357,7 +357,7 @@ CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct
 drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_fairness_pub_year stored as parquet as
-select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
+select /*+ COALESCE(100) */ cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
 from allresults
         join result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; /*EOS*/

@ -381,7 +381,7 @@ CREATE TEMPORARY VIEW allresults as
 drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_fairness_pub stored as parquet as
-select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
+select /*+ COALESCE(100) */ ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
 from allresults ar join result_fair rf
 on rf.organization=ar.organization; /*EOS*/

@ -404,7 +404,7 @@ CREATE TEMPORARY VIEW allresults as
 drop table if exists ${stats_db_name}.indi_org_fairness_year purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as
-    select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
+    select /*+ COALESCE(100) */ cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
    from allresults
    join result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); /*EOS*/

@ -427,7 +427,7 @@ CREATE TEMPORARY VIEW allresults as
 drop table if exists ${stats_db_name}.indi_org_findable_year purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as
-select cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
+select /*+ COALESCE(100) */ cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
 from allresults
         join result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); /*EOS*/

@ -450,7 +450,7 @@ select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name
 drop table if exists ${stats_db_name}.indi_org_findable purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_findable stored as parquet as
-select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
+select /*+ COALESCE(100) */ allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
 from allresults
         join result_with_pid on result_with_pid.organization=allresults.organization; /*EOS*/

@ -516,7 +516,7 @@ select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsof
 drop table if exists ${stats_db_name}.indi_org_openess purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_openess stored as parquet as
-select allpubsshare.organization,
+select /*+ COALESCE(100) */ allpubsshare.organization,
       (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end)
           +(case when d is null then 0 else 1 end))
           org_openess FROM allpubsshare
@ -593,7 +593,7 @@ select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/all
 drop table if exists ${stats_db_name}.indi_org_openess_year purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as
-select cast(allpubsshare.year as int) year, allpubsshare.organization,
+select /*+ COALESCE(100) */ cast(allpubsshare.year as int) year, allpubsshare.organization,
       (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end)
           +(case when d is null then 0 else 1 end))
           org_openess FROM allpubsshare
@ -617,7 +617,7 @@ DROP VIEW allsoftwaresshare; /*EOS*/
 drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as
-select distinct p.id, coalesce(has_preprint, 0) as has_preprint
+select /*+ COALESCE(100) */ distinct p.id, coalesce(has_preprint, 0) as has_preprint
 from ${stats_db_name}.publication_classifications p
         left outer join (
    select p.id, 1 as has_preprint
@ -627,7 +627,7 @@ from ${stats_db_name}.publication_classifications p
 drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as
-select distinct p.id, coalesce(is_subscription, 0) as is_subscription
+select /*+ COALESCE(100) */ distinct p.id, coalesce(is_subscription, 0) as is_subscription
 from ${stats_db_name}.publication p
         left outer join(
    select  p.id, 1 as is_subscription from ${stats_db_name}.publication p
@ -640,7 +640,7 @@ from ${stats_db_name}.publication p
 drop table if exists ${stats_db_name}.indi_result_with_pid purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_result_with_pid stored as parquet as
-select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid
+select /*+ COALESCE(100) */ distinct p.id, coalesce(result_with_pid, 0) as result_with_pid
 from ${stats_db_name}.result p
         left outer join (
    select p.id, 1 as result_with_pid
@ -654,7 +654,7 @@ group by rf.id; /*EOS*/
 drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_pub_interdisciplinarity stored as parquet as
-select distinct p.id as id, coalesce(is_interdisciplinary, 0)
+select /*+ COALESCE(100) */ distinct p.id as id, coalesce(is_interdisciplinary, 0)
 as is_interdisciplinary
 from pub_fos_totals p
 left outer join (
@ -666,7 +666,7 @@ drop view pub_fos_totals; /*EOS*/
 drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/

 create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
-select distinct p.id,coalesce(is_bronze_oa,0) is_bronze_oa
+select /*+ COALESCE(100) */ distinct p.id,coalesce(is_bronze_oa,0) is_bronze_oa
 from ${stats_db_name}.publication p
 left outer join (
    select p.id, 1 as is_bronze_oa
@ -689,7 +689,7 @@ where p.end_year is NOT NULL and r.year is not null; /*EOS*/
 drop table if exists ${stats_db_name}.indi_is_project_result_after purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_is_project_result_after stored as parquet as
-select pry.project_id, pry.acronym, pry.result_id,
+select /*+ COALESCE(100) */ pry.project_id, pry.acronym, pry.result_id,
 coalesce(is_project_result_after, 0) as is_project_result_after
 from project_year_result_year pry
 left outer join (select pry.project_id, pry.acronym, pry.result_id, 1 as is_project_result_after
@ -701,7 +701,7 @@ drop view project_year_result_year; /*EOS*/
 drop table if exists ${stats_db_name}.indi_is_funder_plan_s purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_is_funder_plan_s stored as parquet as
-select distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s
+select /*+ COALESCE(100) */ distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s
 from ${stats_db_name}.funder f
         left outer join (select id, name, 1 as is_funder_plan_s from ${stats_db_name}.funder
         join stats_ext.plan_s_short on c_o_alition_s_organisation_funder=name) tmp
@ -722,7 +722,7 @@ create table if not exists ${stats_db_name}.indi_funder_fairness stored as parqu
    join ${stats_db_name}.project p on p.id=rp.project
    where  cast(year as int)>2003
    group by p.funder)
-select allresults.funder, result_fair.no_result_fair/allresults.no_allresults funder_fairness
+select /*+ COALESCE(100) */ allresults.funder, result_fair.no_result_fair/allresults.no_allresults funder_fairness
 from allresults
         join result_fair on result_fair.funder=allresults.funder; /*EOS*/

@ -745,7 +745,7 @@ allresults as
    join ${stats_db_name}.result r on r.id=rc.id
    where  cast(year as int)>2003
    group by rc.ri_initiative)
-select allresults.ri_initiative, result_fair.no_result_fair/allresults.no_allresults ris_fairness
+select /*+ COALESCE(100) */ allresults.ri_initiative, result_fair.no_result_fair/allresults.no_allresults ris_fairness
 from allresults
         join result_fair on result_fair.ri_initiative=allresults.ri_initiative; /*EOS*/

@ -817,16 +817,14 @@ select software_oa.funder, software_oa.no_oasoftware/allsoftware.no_allsoftware
 drop table if exists ${stats_db_name}.indi_funder_openess purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_funder_openess stored as parquet as
-select allpubsshare.funder,
-       (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end)
-           +(case when d is null then 0 else 1 end))
-           funder_openess FROM allpubsshare
-                                left outer join (select funder,d from
-    alldatasetssshare) tmp1
-                                                on tmp1.funder=allpubsshare.funder
-                                left outer join (select funder,s from
-    allsoftwaresshare) tmp2
-                                                on tmp2.funder=allpubsshare.funder; /*EOS*/
+select /*+ COALESCE(100) */ allpubsshare.funder,
+   (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end)
+       +(case when d is null then 0 else 1 end)) funder_openess
+FROM allpubsshare
+    left outer join (select funder,d from alldatasetssshare) tmp1
+        on tmp1.funder=allpubsshare.funder
+    left outer join (select funder,s from allsoftwaresshare) tmp2
+        on tmp2.funder=allpubsshare.funder; /*EOS*/

 DROP VIEW pubs_oa; /*EOS*/
 DROP VIEW datasets_oa; /*EOS*/
@ -905,7 +903,7 @@ select software_oa.ri_initiative, software_oa.no_oasoftware/allsoftware.no_allso
 drop table if exists ${stats_db_name}.indi_ris_openess purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_ris_openess stored as parquet as
-select allpubsshare.ri_initiative,
+select /*+ COALESCE(100) */ allpubsshare.ri_initiative,
       (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end)
           +(case when d is null then 0 else 1 end))
 	ris_openess FROM allpubsshare
@ -943,7 +941,7 @@ with result_findable as
    join ${stats_db_name}.project p on p.id=rp.project
    where  cast(year as int)>2003
    group by p.funder)
-select allresults.funder, result_findable.no_result_findable/allresults.no_allresults funder_findable
+select /*+ COALESCE(100) */ allresults.funder, result_findable.no_result_findable/allresults.no_allresults funder_findable
 from allresults
         join result_findable on result_findable.funder=allresults.funder; /*EOS*/

@ -952,41 +950,43 @@ drop table if exists ${stats_db_name}.indi_ris_findable purge; /*EOS*/

 create table if not exists ${stats_db_name}.indi_ris_findable stored as parquet as
 with result_contexts as
-(select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc
-join ${stats_db_name}.concept on concept.id=rc.concept
-join ${stats_db_name}.category on category.id=concept.category
-join ${stats_db_name}.context on context.id=category.context),
-result_findable as
-        (select rc.ri_initiative ri_initiative, count(distinct rc.id) no_result_findable from result_contexts rc
-    join ${stats_db_name}.result r on r.id=rc.id
-    join ${stats_db_name}.result_pids rp on rp.id=r.id
-    where cast(r.year as int)>2003
-    group by rc.ri_initiative),
-allresults as
-(select rc.ri_initiative ri_initiative, count(distinct rc.id) no_allresults from result_contexts rc
-    join ${stats_db_name}.result r on r.id=rc.id
-    where  cast(r.year as int)>2003
-    group by rc.ri_initiative)
-select allresults.ri_initiative, result_findable.no_result_findable/allresults.no_allresults ris_findable
+    (select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc
+    join ${stats_db_name}.concept on concept.id=rc.concept
+    join ${stats_db_name}.category on category.id=concept.category
+    join ${stats_db_name}.context on context.id=category.context),
+    result_findable as
+            (select rc.ri_initiative ri_initiative, count(distinct rc.id) no_result_findable from result_contexts rc
+        join ${stats_db_name}.result r on r.id=rc.id
+        join ${stats_db_name}.result_pids rp on rp.id=r.id
+        where cast(r.year as int)>2003
+        group by rc.ri_initiative),
+    allresults as
+    (select rc.ri_initiative ri_initiative, count(distinct rc.id) no_allresults from result_contexts rc
+        join ${stats_db_name}.result r on r.id=rc.id
+        where  cast(r.year as int)>2003
+        group by rc.ri_initiative)
+select /*+ COALESCE(100) */ allresults.ri_initiative, result_findable.no_result_findable/allresults.no_allresults ris_findable
 from allresults
         join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/

+drop table if exists ${stats_db_name}.indi_pub_publicly_funded purge; /*EOS*/
+
 create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as
 with org_names_pids as
-(select org.id,name, pid from ${stats_db_name}.organization org
-join ${stats_db_name}.organization_pids op on org.id=op.id),
-publicly_funded_orgs as
-(select distinct name from
-(select pf.name from stats_ext.insitutions_for_publicly_funded pf
-join ${stats_db_name}.fundref f on f.name=pf.name where f.type='government'
-union all
-select pf.name from stats_ext.insitutions_for_publicly_funded pf
-join ${stats_db_name}.project p on p.funder=pf.name
-union all
-select op.name from stats_ext.insitutions_for_publicly_funded pf
-join org_names_pids op on (op.name=pf.name or op.pid=pf.ror)
-and pf.publicly_funded='yes') foo)
-select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded
+    (select org.id,name, pid from ${stats_db_name}.organization org
+    join ${stats_db_name}.organization_pids op on org.id=op.id),
+    publicly_funded_orgs as
+    (select distinct name from
+    (select pf.name from stats_ext.insitutions_for_publicly_funded pf
+    join ${stats_db_name}.fundref f on f.name=pf.name where f.type='government'
+    union all
+    select pf.name from stats_ext.insitutions_for_publicly_funded pf
+    join ${stats_db_name}.project p on p.funder=pf.name
+    union all
+    select op.name from stats_ext.insitutions_for_publicly_funded pf
+    join org_names_pids op on (op.name=pf.name or op.pid=pf.ror)
+    and pf.publicly_funded='yes') foo)
+select /*+ COALESCE(100) */ distinct p.id, coalesce(publicly_funded, 0) as publicly_funded
 from ${stats_db_name}.publication p
 left outer join (
 select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organization ro
@ -995,7 +995,7 @@ join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; /*EOS*/

 drop table if exists ${stats_db_name}.indi_pub_green_with_license purge; /*EOS*/
 create table ${stats_db_name}.indi_pub_green_with_license stored as parquet as
-select distinct p.id, coalesce(green_with_license, 0) as green_with_license
+select /*+ COALESCE(100) */ distinct p.id, coalesce(green_with_license, 0) as green_with_license
 from ${stats_db_name}.publication p
 left outer join (
    select distinct p.id, 1 as green_with_license from ${stats_db_name}.publication p
@ -1006,7 +1006,7 @@ left outer join (
 drop table if exists ${stats_db_name}.result_country purge; /*EOS*/

 create table ${stats_db_name}.result_country stored as parquet as
-select distinct id, country
+select /*+ COALESCE(100) */ distinct id, country
 from (
    select ro.id, o.country
    from ${stats_db_name}.result_organization ro
@ -1021,7 +1021,7 @@ where rc.country is not null; /*EOS*/

 drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
 create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as
-select distinct r.id, coalesce(oa_with_license,0) as oa_with_license
+select /*+ COALESCE(100) */ distinct r.id, coalesce(oa_with_license,0) as oa_with_license
 from ${stats_db_name}.result r
 left outer join (select distinct r.id, 1 as oa_with_license from ${stats_db_name}.result r
 join ${stats_db_name}.result_licenses rl on rl.id=r.id where r.bestlicence='Open Access') tmp on r.id=tmp.id; /*EOS*/
@ -1029,9 +1029,9 @@ join ${stats_db_name}.result_licenses rl on rl.id=r.id where r.bestlicence='Open
 drop table if exists ${stats_db_name}.indi_result_oa_without_license purge; /*EOS*/
 create table ${stats_db_name}.indi_result_oa_without_license stored as parquet as
 with without_license as
-(select distinct id from ${stats_db_name}.indi_result_oa_with_license
-where oa_with_license=0)
-select distinct r.id, coalesce(oa_without_license,0) as oa_without_license
+    (select distinct id from ${stats_db_name}.indi_result_oa_with_license
+    where oa_with_license=0)
+select /*+ COALESCE(100) */ distinct r.id, coalesce(oa_without_license,0) as oa_without_license
 from ${stats_db_name}.result r
 left outer join (select distinct r.id, 1 as oa_without_license
 from ${stats_db_name}.result r
@ -1042,7 +1042,7 @@ drop table if exists ${stats_db_name}.indi_result_under_transformative purge; /*
 create table ${stats_db_name}.indi_result_under_transformative stored as parquet as
 with transformative_dois as (
    select distinct doi from stats_ext.transformative_facts)
-select distinct r.id, coalesce(under_transformative,0) as under_transformative
+select /*+ COALESCE(100) */ distinct r.id, coalesce(under_transformative,0) as under_transformative
 from ${stats_db_name}.result r
 left outer join (
    select distinct rp.id, 1 as under_transformative
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql
@ -1,30 +1,30 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/

 ----------------------------------------------------
 -- Shortcuts for various definitions in stats db ---
 ----------------------------------------------------

 -- Peer reviewed:
-drop table if exists ${stats_db_name}.result_peerreviewed purge;
+drop table if exists ${stats_db_name}.result_peerreviewed purge; /*EOS*/

 create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
-select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
+select /*+ COALESCE(100) */ r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
 from ${stats_db_name}.result r
 left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
-left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
+left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; /*EOS*/

 -- Green OA:
-drop table if exists ${stats_db_name}.result_greenoa purge;
+drop table if exists ${stats_db_name}.result_greenoa purge; /*EOS*/

 create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
-select r.id, case when green.green_oa=1 then true else false end as green
+select /*+ COALESCE(100) */ r.id, case when green.green_oa=1 then true else false end as green
 from ${stats_db_name}.result r
-left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
+left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; /*EOS*/

 -- GOLD OA:
-drop table if exists ${stats_db_name}.result_gold purge;
+drop table if exists ${stats_db_name}.result_gold purge; /*EOS*/

 create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
-select r.id, case when gold.is_gold=1 then true else false end as gold
+select /*+ COALESCE(100) */ r.id, case when gold.is_gold=1 then true else false end as gold
 from ${stats_db_name}.result r
-         left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
+         left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql
@ -1,58 +1,26 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/

-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
+-- replace the creation of the result view with a table, which will include the boolean fields from the previous tables (green, gold,
 -- peer reviewed)
-drop table if exists ${stats_db_name}.result_tmp;

-CREATE TABLE ${stats_db_name}.result_tmp (
-    id STRING,
-    title STRING,
-    publisher STRING,
-    journal STRING,
-    `date` STRING,
-    `year` INT,
-    bestlicence STRING,
-    access_mode STRING,
-    embargo_end_date STRING,
-    delayed BOOLEAN,
-    authors INT,
-    source STRING,
-    abstract BOOLEAN,
-    type STRING ,
-    peer_reviewed BOOLEAN,
-    green BOOLEAN,
-    gold BOOLEAN)
-clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true');
+drop view if exists ${stats_db_name}.result; /*EOS*/
+drop table if exists ${stats_db_name}.result; /*EOS*/

-insert into ${stats_db_name}.result_tmp
-select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
-FROM ${stats_db_name}.publication r
+CREATE TABLE ${stats_db_name}.result stored as parquet as
+SELECT /*+ COALESCE(100) */ r.id, r.title, r.publisher, r.journal, r.`date`, DATE_FORMAT(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
+FROM (
+    (SELECT id, title, p.publisher, journal, `date`, DATE_FORMAT(`date`, 'yyyy'), bestlicence, bestlicence, embargo_end_date, delayed, authors, source, abstract, type
+        FROM ${stats_db_name}.publication)
+    UNION ALL
+    (SELECT id, title, p.publisher, journal, `date`, DATE_FORMAT(`date`, 'yyyy'), bestlicence, bestlicence, embargo_end_date, delayed, authors, source, abstract, type
+        FROM ${stats_db_name}.dataset)
+    UNION ALL
+    (select id, title, p.publisher, journal, `date`, DATE_FORMAT(`date`, 'yyyy'), bestlicence, bestlicence, embargo_end_date, delayed, authors, source, abstract, type
+        FROM ${stats_db_name}.software)
+    UNION ALL
+    (select id, title, p.publisher, journal, `date`, DATE_FORMAT(`date`, 'yyyy'), bestlicence, bestlicence, embargo_end_date, delayed, authors, source, abstract, type
+        FROM ${stats_db_name}.otherresearchproduct)
+    ) r
 LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
 LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
-
-insert into ${stats_db_name}.result_tmp
-select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
-FROM ${stats_db_name}.dataset r
-LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
-
-insert into ${stats_db_name}.result_tmp
-select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
-FROM ${stats_db_name}.software r
-LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
-
-insert into ${stats_db_name}.result_tmp
-select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
-FROM ${stats_db_name}.otherresearchproduct r
-LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
-LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
-
-drop table if exists ${stats_db_name}.result;
-drop view if exists ${stats_db_name}.result;
-create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
-drop table ${stats_db_name}.result_tmp;
+LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
@ -1,4 +1,4 @@
-set mapred.job.queue.name=analytics;
+set mapred.job.queue.name=analytics; /*EOS*/

 --------------------------------------------------------------
 --------------------------------------------------------------
@ -7,65 +7,65 @@ set mapred.job.queue.name=analytics;
 --------------------------------------------------------------

 -- Publication temporary table
-DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp purge;
-CREATE TABLE ${stats_db_name}.publication_tmp
-(
-    id               STRING,
-    title            STRING,
-    publisher        STRING,
-    journal          STRING,
-    date             STRING,
-    year             STRING,
-    bestlicence      STRING,
-    embargo_end_date STRING,
-    delayed          BOOLEAN,
-    authors          INT,
-    source           STRING,
-    abstract         BOOLEAN,
-    type             STRING
-)
-    clustered by (id) into 100 buckets stored as orc tblproperties ('transactional' = 'true');
+DROP TABLE IF EXISTS ${stats_db_name}.publication purge; /*EOS*/

-INSERT INTO ${stats_db_name}.publication_tmp
-SELECT substr(p.id, 4)                                            as id,
-       p.title[0].value                                           as title,
-       p.publisher.value                                          as publisher,
-       p.journal.name                                             as journal,
-       p.dateofacceptance.value                                   as date,
-       date_format(p.dateofacceptance.value, 'yyyy')              as year,
-       p.bestaccessright.classname                                as bestlicence,
-       p.embargoenddate.value                                     as embargo_end_date,
-       false                                                      as delayed,
-       size(p.author)                                             as authors,
-       concat_ws('\u003B', p.source.value)                        as source,
-       case when size(p.description) > 0 then true else false end as abstract,
-       'publication'                                              as type
-from ${openaire_db_name}.publication p
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+CREATE TABLE ${stats_db_name}.publication stored as parquet as
+with pub_pr as (
+    select pub.id as pub_id, case when (to_date(pub.dateofacceptance.value) > to_date( pj.enddate.value)) then true else false end as delayed
+    from ${openaire_db_name}.publication pub
+             join ${openaire_db_name}.relation rel
+                  on reltype = 'resultProject' and relclass = 'isProducedBy' and rel.source=pub.id
+                      and rel.datainfo.deletedbyinference = false and rel.datainfo.invisible = false
+             join ${openaire_db_name}.project pj on pj.id=rel.target and pj.datainfo.deletedbyinference = false and pj.datainfo.invisible = false
+    where pub.datainfo.deletedbyinference = false and pub.datainfo.invisible = false
+),
+ pub_delayed as (
+     select pub_id, max(delayed) as delayed
+     from pub_pr
+     group by pub_id
+ )
+select /*+ COALESCE(100) */
+    substr(pub.id, 4)                                                     as id,
+    pub.title[0].value                                                    as title,
+    pub.publisher.value                                                   as publisher,
+    pub.journal.name                                                      as journal,
+    pub.dateofacceptance.value                                            as date,
+    date_format(pub.dateofacceptance.value, 'yyyy')                       as year,
+    pub.bestaccessright.classname                                         as bestlicence,
+    pub.embargoenddate.value                                              as embargo_end_date,
+    coalesce(pub_delayed.delayed, false)                                  as delayed, -- It's delayed, when the publication was published after the end of at least one of its projects.
+    size(pub.author)                                                      as authors,
+    concat_ws('\u003B', pub.source.value)                                 as source,
+    case when size(pub.description) > 0 then true else false end          as abstract,
+    'publication'                                                         as type
+from ${openaire_db_name}.publication pub
+    left outer join pub_delayed on pub.id=pub_delayed.pub_id
+where pub.datainfo.deletedbyinference = false and pub.datainfo.invisible = false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge;
+
+DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, instancetype.classname as type
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as type
 from ${openaire_db_name}.publication p
         LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
-SELECT substr(p.id, 4) as id, case
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
    when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
    when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
    when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
 from ${openaire_db_name}.publication p
         LATERAL VIEW explode(p.context) contexts as context
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
-SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
+SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
 FROM (
         SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
         from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
@ -73,44 +73,44 @@ FROM (
         LEFT OUTER JOIN (
    SELECT substr(d.id, 4) id
    from ${openaire_db_name}.datasource d
-    WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
+    WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
-select substr(p.id, 4) as id, p.language.classname as language
+select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as language
 FROM ${openaire_db_name}.publication p
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
-SELECT substr(p.id, 4) AS id, oids.ids AS oid
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
 FROM ${openaire_db_name}.publication p
         LATERAL VIEW explode(p.originalid) oids AS ids
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
-SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
 FROM ${openaire_db_name}.publication p
         LATERAL VIEW explode(p.pid) pids AS ppid
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
-select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
+select /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
 FROM ${openaire_db_name}.publication p
         LATERAL VIEW explode(p.subject) subjects AS subject
-where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

-DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge;
+DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/

 CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
-SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
+SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
 FROM ${openaire_db_name}.publication p
         lateral view explode(p.extrainfo) citations AS citation
 WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
-  and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
+  and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@ -1,3 +1,5 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 create view if not exists TARGET.category as select * from SOURCE.category;
 create view if not exists TARGET.concept as select * from SOURCE.concept;
 create view if not exists TARGET.context as select * from SOURCE.context;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql
@ -1,3 +1,5 @@
+set mapred.job.queue.name=analytics; /*EOS*/
+
 drop database if exists TARGET cascade;
 create database if not exists TARGET;

@ -81,11 +83,17 @@ create table TARGET.result stored as parquet as
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',   -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',  -- TU Dresden
-             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  --	University of Vienna
+             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
+             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
-             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',  -- Sorbonne University
-             'openorgs____::b316f25380d106aac402f5ae8653910d'  --	Centre for Research on Ecology and Forestry Applications
+             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
+             'openorgs____::b316f25380d106aac402f5ae8653910d',  -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::45a2076eee3013e0e85625ce61bcd272',  -- Institut d'Investigació Sanitària Illes Balears
+             'openorgs____::00b20b0a743a96169e6cf135e6e2bd7c',  -- Universidad Publica De Navarra
+             'openorgs____::0f398605c2459294d125ff23473a97dc',  -- Aalto University
+             'openorgs____::25b1fa62c7fd8e409d3a83c07e04b2d4',  -- WHU-Otto Beisheim School of Management
+             'openorgs____::d6eec313417f11205db4e736a34c0db6',  -- KEMPELENOV INSTITUT INTELIGENTNYCH TECHNOLOGII
+             'openorgs____::c2dfb90e797a2dc52f0084c549289d0c'  -- National Research Institute for Agriculture, Food and Environment
        ) )) foo;

 create view if not exists TARGET.category as select * from SOURCE.category;
@ -256,7 +264,6 @@ create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * f

 create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 create table TARGET.indi_pub_publicly_funded stored as parquet as select * from SOURCE.indi_pub_publicly_funded orig where exists (select 1 from TARGET.result r where r.id=orig.id);

 create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql
@ -1,3 +1,5 @@
+set mapred.job.queue.name=analytics;
+
 drop database if exists TARGET cascade;
 create database if not exists TARGET;

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Miriam Baglioni	420f43fc2f	[affRo] added option to run on crossref	2024-10-24 11:49:13 +02:00
Miriam Baglioni	595883fef0	merging with branch beta	2024-10-21 08:56:08 +02:00
Miriam Baglioni	f8988af98d	[affMatchings] adding choice to run the algo on oalexdata and get specific branch instead of release of affro	2024-10-18 13:58:14 +02:00
Giambattista Bloisi	56b05cde0b	Revert the changes for IgnoreUndefined management in tree evaluation	2024-10-11 10:35:15 +02:00
Claudio Atzori	62ff843334	adopting dhp-schemas:8.0.1 to support Auhtor's rawAffiliationString(s). Improved graph2hive implementation	2024-10-08 16:22:54 +02:00
Claudio Atzori	d5867a1992	merged #490	2024-10-08 15:39:59 +02:00
Claudio Atzori	e5df68772d	[graph provision] fixed serialisation of the usage counts as measures in the XML records	2024-10-02 09:35:21 +02:00
Miriam Baglioni	7e6d12fa77	[UsageCount] fixed error (cherry picked from commit `9c9a9562ae`)	2024-10-01 15:55:07 +02:00
Miriam Baglioni	191fc3a461	[UsageCount] add check in case the datasource is not matched against those present in the graph (cherry picked from commit `b42bdd5fb3`)	2024-10-01 15:54:31 +02:00
Claudio Atzori	10696f2a44	reverted procedure for creating the UsageCounts actionset	2024-10-01 15:54:13 +02:00
Claudio Atzori	5734b80861	Merge pull request 'datasource table creation split in steps' (#489 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #489	2024-09-30 16:34:38 +02:00
Antonis Lempesis	f3c179658a	datasource table creation split in steps	2024-09-30 17:12:21 +03:00
Miriam Baglioni	b18ad035c1	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2024-09-30 15:10:44 +02:00
Miriam Baglioni	e430826e00	[ImportOC] fix to move original folder instead of extracted ones	2024-09-30 15:10:10 +02:00
Giambattista Bloisi	c45cae447a	Fix: invert the "natural" order when ordering by id lexicographically	2024-09-26 17:08:02 +02:00
Claudio Atzori	3fcafc7ed6	Merge pull request 'Latest institutions in monitor dbs' (#472 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #472	2024-09-26 09:49:01 +02:00
Miriam Baglioni	599e56dbc6	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2024-09-25 17:28:23 +02:00
Claudio Atzori	6397141e56	code formatting	2024-09-25 15:27:32 +02:00
Claudio Atzori	e354f9853a	[OpenCitations] move the extracted contents under a backup path to avoid needing to re-download it in case of errors	2024-09-25 15:27:02 +02:00
Claudio Atzori	535a7b99f1	the metadata collection plugins using the HttpConnector2 class shall now retry instead of failing in case of UnknownHostException	2024-09-25 11:35:34 +02:00
Sandro La Bruzzo	6a097abc89	as described on ticket #9525 1. Changed the mapping applied to Crossref records: anything that has a relationship "is-review-of" must be mapped as publication of type "Review". 2. Force the hostedby of Crossref records with DOI prefix 10.3410 and 10.12703 to the H1 Connect data source.	2024-09-25 11:32:54 +02:00
Michele Artini	9754521847	Merge pull request 'fixed a bug with id' (#486 ) from osfPreprints_plugin into beta Reviewed-on: #486	2024-09-25 10:02:24 +02:00
Michele Artini	54f8b4da39	Merge pull request 'fixed a bug with 'null' string' (#484 ) from osfPreprints_plugin into beta Reviewed-on: #484	2024-09-24 15:19:54 +02:00
Miriam Baglioni	4d3e079590	Merge remote-tracking branch 'origin/beta' into beta	2024-09-24 14:26:29 +02:00
Michele Artini	e941adbe2b	fixed a bug with topic ENRICH/MORE/SUBJECT/ARXIV	2024-09-24 08:57:37 +02:00
Michele Artini	fdbe629f49	removed the deletedByInference=true filter	2024-09-23 15:27:28 +02:00
Antonis Lempesis	619aa34a15	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into beta	2024-09-23 15:25:59 +03:00
Antonis Lempesis	dbea7a4072	removed duplicate line	2024-09-23 14:57:11 +03:00
Antonis Lempesis	c9241dba0d	Merge pull request 'convert_hive_to_spark_actions' (#1 ) from convert_hive_to_spark_actions into beta Reviewed-on: antonis.lempesis/dnet-hadoop#1	2024-09-23 13:53:28 +02:00
Michele Artini	755a5aefcf	Merge pull request 'osfPreprints_plugin' (#482 ) from osfPreprints_plugin into beta Reviewed-on: #482	2024-09-23 10:21:34 +02:00
Michele Artini	db6f137cf9	Merge pull request 'osfPreprints_plugin' (#480 ) from osfPreprints_plugin into beta Reviewed-on: #480	2024-09-20 09:56:50 +02:00
Serafeim Chatzopoulos	50401a872f	Add affRo algorithm as an external library	2024-09-09 16:13:26 +03:00
Antonis Lempesis	37ad259296	cleanup	2024-09-05 16:02:44 +03:00
Antonis Lempesis	b64c144abf	added new institutions	2024-09-05 16:00:09 +03:00
Serafeim Chatzopoulos	37c04cbad7	Add affro workflow	2024-08-28 12:41:47 +03:00
Miriam Baglioni	468f2aa5a5	[AffiliationAffRo]align beta with new affiliation from publisher webpage introduced in production. AffRo collectedfrom OpenAIRE to discriminate against WebCrawl	2024-08-12 18:10:46 +02:00
Miriam Baglioni	89fcf4086c	[Person]fix issue in affiliation relation id construction for person (missing ::)	2024-08-12 18:04:43 +02:00
Miriam Baglioni	8c185a7b1a	resolving conflicts	2024-08-05 17:14:11 +02:00
Miriam Baglioni	985ca15264	[openaire-affiliation]removes matchings without DOI	2024-08-05 12:10:40 +02:00
Antonis Lempesis	d0590e0e49	added latest institutions	2024-07-23 15:17:15 +03:00
Antonis Lempesis	7d2c0a3723	added new institutions	2024-07-23 15:10:17 +03:00
Lampros Smyrnaios	e9686365a2	Improve performance of creating the "result_fos" table, by using a temp-table to cache data, which is requested multiple times.	2024-07-03 20:24:36 +03:00
Lampros Smyrnaios	ce0aee21cc	Improve performance of transferring the stats-DBs to another cluster and querying the DBs' tables, by ordering Spark to create up to 100 files per table, instead of thousands.	2024-07-03 20:15:33 +03:00
Lampros Smyrnaios	7b7dd32ad5	- Fix placement of some "set mapred.job.queue.name=analytics" statements and remove their unused "/EOS/" indicator. - Add stacktrace-info to failed actions.	2024-07-03 19:53:24 +03:00
Lampros Smyrnaios	7ce051d766	- Update the remaining hive-actions to spark-actions. - Update the version of shell-actions. - Fix missing "/EOS/" indicators.	2024-07-03 19:49:19 +03:00
Lampros Smyrnaios	aa4d7d5e20	Prioritize the rest of the stats-queries over other tasks on the cluster, by putting them in the "analytics" queue.	2024-07-03 19:14:25 +03:00
Lampros Smyrnaios	54e11b6a43	Improve performance and efficiency by rewriting the creation process of "publication", "project", "dataset", "datasource", "software", "otherresearchproduct" and "result" tables, to be performed in a single query, for each one.	2024-07-03 13:03:15 +03:00
Lampros Smyrnaios	fe2275a9b0	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions # Conflicts: # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql	2024-06-25 20:17:47 +03:00
Lampros Smyrnaios	a644a6f4fe	Catch Spark-sql errors and show a log with the statement that failed.	2024-05-29 12:10:11 +03:00
Lampros Smyrnaios	888637773c	Add missing "/EOS/" comments.	2024-05-27 12:34:49 +03:00
Lampros Smyrnaios	e0ac494859	Merge branch 'beta' into convert_hive_to_spark_actions # Conflicts: # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql # dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql	2024-05-27 12:27:40 +03:00
Lampros Smyrnaios	3c17183d10	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-23 17:18:16 +03:00
Lampros Smyrnaios	69a9ac7393	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-22 17:07:11 +03:00
Lampros Smyrnaios	342223f75c	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-19 13:18:34 +03:00
Lampros Smyrnaios	2616971e2b	dhp-stats-update: remove leftover duplicate line	2024-04-18 16:18:16 +03:00
Lampros Smyrnaios	ba533d9f34	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions	2024-04-18 15:47:56 +03:00
Lampros Smyrnaios	d46b78b659	dhp-stats-update: - Set Steps 2-7 and 9 to limit the amount of files generated by Spark, from 8000, down to 100, to improve file-transfer and querying performance. - Allow the workflow to run up to Step10. The Step11 seems to have some issues even when using hive-action.	2024-04-18 15:40:27 +03:00
Lampros Smyrnaios	6f2ebb2a52	Revert Step8 and Step11 to use Hive again, since their "UPDATE" statements are not supported by Spark.	2024-04-18 15:35:03 +03:00
Lampros Smyrnaios	ca091c0f1e	dhp-stats-update: - Fix not passing some parameters to some Spark actions. - Allow the workflow to run up to Step7. The first 7 steps seem to work out of the box.	2024-04-17 14:03:59 +03:00
Lampros Smyrnaios	0b897f2f66	Fix and add missing "DROP TABLE" statements, in "dhp-stats-update" sql-scripts.	2024-04-16 18:17:54 +03:00
Lampros Smyrnaios	db33f7727c	Update "dhp-stats-update" workflow to use "spark"-actions, instead of "hive" ones. Note: Currently the code is set to only test the "Step1".	2024-04-15 16:22:40 +03:00