implemented methods to extract fulltext link from an API call

code adapted as per Michele's recommendations
code refactored
2024-09-11 14:57:38 +02:00 · 2024-09-04 15:29:13 +02:00 · 2024-08-01 09:52:19 +02:00 · 2024-08-01 01:23:04 +02:00 · 2024-07-31 18:05:11 +02:00 · 2024-07-15 16:31:04 +02:00
109 changed files with 3390 additions and 3682 deletions
--- a/.gitignore
+++ b/.gitignore
@ -27,4 +27,3 @@ spark-warehouse
 /**/.factorypath
 /**/.scalafmt.conf
 /.java-version
-/dhp-shade-package/dependency-reduced-pom.xml
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
@ -80,15 +80,7 @@ class WritePredefinedProjectPropertiesTest {
 		mojo.outputFile = testFolder;

 		// execute
-		try {
-			mojo.execute();
-			Assertions.assertTrue(false); // not reached
-		} catch (Exception e) {
-			Assertions
-				.assertTrue(
-					MojoExecutionException.class.isAssignableFrom(e.getClass()) ||
-						IllegalArgumentException.class.isAssignableFrom(e.getClass()));
-		}
+		Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
 	}

 	@Test
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -70,7 +70,10 @@
 			<groupId>com.ibm.icu</groupId>
 			<artifactId>icu4j</artifactId>
 		</dependency>
-
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-common</artifactId>
+		</dependency>
 		<dependency>
 			<groupId>com.github.sisyphsu</groupId>
 			<artifactId>dateparser</artifactId>
@ -160,7 +163,7 @@

 		<dependency>
 			<groupId>eu.dnetlib.dhp</groupId>
-			<artifactId>dhp-schemas</artifactId>
+			<artifactId>${dhp-schemas.artifact}</artifactId>
 		</dependency>

 		<dependency>
@ -169,23 +172,4 @@
 		</dependency>
 	</dependencies>

-	<!-- dependencies required on JDK9+ because J2EE has been removed -->
-	<profiles>
-		<profile>
-			<id>spark-34</id>
-			<dependencies>
-				<dependency>
-					<groupId>javax.xml.bind</groupId>
-					<artifactId>jaxb-api</artifactId>
-					<version>2.2.11</version>
-				</dependency>
-				<dependency>
-					<groupId>com.sun.xml.ws</groupId>
-					<artifactId>jaxws-ri</artifactId>
-					<version>2.3.3</version>
-					<type>pom</type>
-				</dependency>
-			</dependencies>
-		</profile>
-	</profiles>
 </project>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/DbClient.java
@ -7,12 +7,12 @@ import java.sql.*;
 import java.util.function.Consumer;

 import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 public class DbClient implements Closeable {

-	private static final Log log = LogFactory.getLog(DbClient.class);
+	private static final Logger log = LoggerFactory.getLogger(DbClient.class);

 	private final Connection connection;

@ -37,6 +37,8 @@ public class DbClient implements Closeable {
 		try (final Statement stmt = connection.createStatement()) {
 			stmt.setFetchSize(100);

+			log.info("running SQL:\n\n{}\n\n", sql);
+
 			try (final ResultSet rs = stmt.executeQuery(sql)) {
 				while (rs.next()) {
 					consumer.accept(rs);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
@ -38,7 +38,7 @@ public class PacePerson {
 					PacePerson.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/common/name_particles.txt")));
-		} catch (Exception e) {
+		} catch (IOException e) {
 			throw new RuntimeException(e);
 		}
 	}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@ -1,53 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import okhttp3.MediaType;
-import okhttp3.RequestBody;
-import okhttp3.internal.Util;
-import okio.BufferedSink;
-import okio.Okio;
-import okio.Source;
-
-public class InputStreamRequestBody extends RequestBody {
-
-	private final InputStream inputStream;
-	private final MediaType mediaType;
-	private final long lenght;
-
-	public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
-
-		return new InputStreamRequestBody(inputStream, mediaType, len);
-	}
-
-	private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
-		this.inputStream = inputStream;
-		this.mediaType = mediaType;
-		this.lenght = len;
-	}
-
-	@Override
-	public MediaType contentType() {
-		return mediaType;
-	}
-
-	@Override
-	public long contentLength() {
-
-		return lenght;
-
-	}
-
-	@Override
-	public void writeTo(BufferedSink sink) throws IOException {
-		Source source = null;
-		try {
-			source = Okio.source(inputStream);
-			sink.writeAll(source);
-		} finally {
-			Util.closeQuietly(source);
-		}
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@ -1,8 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-public class MissingConceptDoiException extends Throwable {
-	public MissingConceptDoiException(String message) {
-		super(message);
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -1,363 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.*;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.http.HttpHeaders;
-import org.apache.http.entity.ContentType;
-import org.jetbrains.annotations.NotNull;
-
-import com.google.gson.Gson;
-
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
-import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
-import okhttp3.*;
-
-public class ZenodoAPIClient implements Serializable {
-
-	String urlString;
-	String bucket;
-
-	String deposition_id;
-	String access_token;
-
-	public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
-
-	private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
-
-	public String getUrlString() {
-		return urlString;
-	}
-
-	public void setUrlString(String urlString) {
-		this.urlString = urlString;
-	}
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public void setDeposition_id(String deposition_id) {
-		this.deposition_id = deposition_id;
-	}
-
-	public ZenodoAPIClient(String urlString, String access_token) {
-
-		this.urlString = urlString;
-		this.access_token = access_token;
-	}
-
-	/**
-	 * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int newDeposition() throws IOException {
-		String json = "{}";
-
-		URL url = new URL(urlString);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
-		this.bucket = newSubmission.getLinks().getBucket();
-		this.deposition_id = newSubmission.getId();
-
-		return responseCode;
-	}
-
-	/**
-	 * Upload files in Zenodo.
-	 *
-	 * @param is the inputStream for the file to upload
-	 * @param file_name the name of the file as it will appear on Zenodo
-	 * @return the response code
-	 */
-	public int uploadIS(InputStream is, String file_name) throws IOException {
-
-		URL url = new URL(bucket + "/" + file_name);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		byte[] buf = new byte[8192];
-		int length;
-		try (OutputStream os = conn.getOutputStream()) {
-			while ((length = is.read(buf)) != -1) {
-				os.write(buf, 0, length);
-			}
-
-		}
-		int responseCode = conn.getResponseCode();
-		if (!checkOKStatus(responseCode)) {
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-		}
-
-		return responseCode;
-	}
-
-	@NotNull
-	private String getBody(HttpURLConnection conn) throws IOException {
-		String body = "{}";
-		try (BufferedReader br = new BufferedReader(
-			new InputStreamReader(conn.getInputStream(), "utf-8"))) {
-			StringBuilder response = new StringBuilder();
-			String responseLine = null;
-			while ((responseLine = br.readLine()) != null) {
-				response.append(responseLine.trim());
-			}
-
-			body = response.toString();
-
-		}
-		return body;
-	}
-
-	/**
-	 * Associates metadata information to the current deposition
-	 *
-	 * @param metadata the metadata
-	 * @return response code
-	 * @throws IOException
-	 */
-	public int sendMretadata(String metadata) throws IOException {
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("PUT");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = metadata.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		final int responseCode = conn.getResponseCode();
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + getBody(conn));
-
-		return responseCode;
-
-	}
-
-	private boolean checkOKStatus(int responseCode) {
-
-		if (HttpURLConnection.HTTP_OK != responseCode ||
-			HttpURLConnection.HTTP_CREATED != responseCode)
-			return true;
-		return false;
-	}
-
-	/**
-	 * To publish the current deposition. It works for both new deposition or new version of an old deposition
-	 *
-	 * @return response code
-	 * @throws IOException
-	 */
-	@Deprecated
-	public int publish() throws IOException {
-
-		String json = "{}";
-
-		OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
-
-		RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
-
-		Request request = new Request.Builder()
-			.url(urlString + "/" + deposition_id + "/actions/publish")
-			.addHeader("Authorization", "Bearer " + access_token)
-			.post(body)
-			.build();
-
-		try (Response response = httpClient.newCall(request).execute()) {
-
-			if (!response.isSuccessful())
-				throw new IOException("Unexpected code " + response + response.body().string());
-
-			return response.code();
-
-		}
-	}
-
-	/**
-	 * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
-	 * for the new version.
-	 *
-	 * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
-	 *            part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 */
-	public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
-		setDepositionId(concept_rec_id, 1);
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("POST");
-
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		String latest_draft = zenodoModel.getLinks().getLatest_draft();
-		deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
-		bucket = getBucket(latest_draft);
-
-		return responseCode;
-
-	}
-
-	/**
-	 * To finish uploading a version or new deposition not published
-	 * It sets the deposition_id and the bucket to be used
-	 *
-	 *
-	 * @param deposition_id the deposition id of the not yet published upload
-	 *            concept_rec_id = 656930
-	 * @return response code
-	 * @throws IOException
-	 * @throws MissingConceptDoiException
-	 */
-	public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
-
-		this.deposition_id = deposition_id;
-
-		String json = "{}";
-
-		URL url = new URL(urlString + "/" + deposition_id);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setRequestMethod("POST");
-		conn.setDoOutput(true);
-		try (OutputStream os = conn.getOutputStream()) {
-			byte[] input = json.getBytes("utf-8");
-			os.write(input, 0, input.length);
-		}
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-		conn.disconnect();
-
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-		bucket = zenodoModel.getLinks().getBucket();
-
-		return responseCode;
-
-	}
-
-	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
-
-		ZenodoModelList zenodoModelList = new Gson()
-			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
-
-		for (ZenodoModel zm : zenodoModelList) {
-			if (zm.getConceptrecid().equals(concept_rec_id)) {
-				deposition_id = zm.getId();
-				return;
-			}
-		}
-		if (zenodoModelList.size() == 0)
-			throw new MissingConceptDoiException(
-				"The concept record id specified was missing in the list of depositions");
-		setDepositionId(concept_rec_id, page + 1);
-
-	}
-
-	private String getPrevDepositions(String page) throws IOException {
-
-		HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
-		urlBuilder.addQueryParameter("page", page);
-
-		URL url = new URL(urlBuilder.build().toString());
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		return body;
-
-	}
-
-	private String getBucket(String inputUurl) throws IOException {
-
-		URL url = new URL(inputUurl);
-		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
-		conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
-		conn.setDoOutput(true);
-		conn.setRequestMethod("GET");
-
-		String body = getBody(conn);
-
-		int responseCode = conn.getResponseCode();
-
-		conn.disconnect();
-		if (!checkOKStatus(responseCode))
-			throw new IOException("Unexpected code " + responseCode + body);
-
-		ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
-
-		return zenodoModel.getLinks().getBucket();
-
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@ -1,14 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Community {
-	private String identifier;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@ -1,47 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-public class Creator {
-	private String affiliation;
-	private String name;
-	private String orcid;
-
-	public String getAffiliation() {
-		return affiliation;
-	}
-
-	public void setAffiliation(String affiliation) {
-		this.affiliation = affiliation;
-	}
-
-	public String getName() {
-		return name;
-	}
-
-	public void setName(String name) {
-		this.name = name;
-	}
-
-	public String getOrcid() {
-		return orcid;
-	}
-
-	public void setOrcid(String orcid) {
-		this.orcid = orcid;
-	}
-
-	public static Creator newInstance(String name, String affiliation, String orcid) {
-		Creator c = new Creator();
-		if (name != null) {
-			c.name = name;
-		}
-		if (affiliation != null) {
-			c.affiliation = affiliation;
-		}
-		if (orcid != null) {
-			c.orcid = orcid;
-		}
-
-		return c;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@ -1,44 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class File implements Serializable {
-	private String checksum;
-	private String filename;
-	private long filesize;
-	private String id;
-
-	public String getChecksum() {
-		return checksum;
-	}
-
-	public void setChecksum(String checksum) {
-		this.checksum = checksum;
-	}
-
-	public String getFilename() {
-		return filename;
-	}
-
-	public void setFilename(String filename) {
-		this.filename = filename;
-	}
-
-	public long getFilesize() {
-		return filesize;
-	}
-
-	public void setFilesize(long filesize) {
-		this.filesize = filesize;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@ -1,23 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Grant implements Serializable {
-	private String id;
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public static Grant newInstance(String id) {
-		Grant g = new Grant();
-		g.id = id;
-
-		return g;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@ -1,92 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class Links implements Serializable {
-
-	private String bucket;
-
-	private String discard;
-
-	private String edit;
-	private String files;
-	private String html;
-	private String latest_draft;
-	private String latest_draft_html;
-	private String publish;
-
-	private String self;
-
-	public String getBucket() {
-		return bucket;
-	}
-
-	public void setBucket(String bucket) {
-		this.bucket = bucket;
-	}
-
-	public String getDiscard() {
-		return discard;
-	}
-
-	public void setDiscard(String discard) {
-		this.discard = discard;
-	}
-
-	public String getEdit() {
-		return edit;
-	}
-
-	public void setEdit(String edit) {
-		this.edit = edit;
-	}
-
-	public String getFiles() {
-		return files;
-	}
-
-	public void setFiles(String files) {
-		this.files = files;
-	}
-
-	public String getHtml() {
-		return html;
-	}
-
-	public void setHtml(String html) {
-		this.html = html;
-	}
-
-	public String getLatest_draft() {
-		return latest_draft;
-	}
-
-	public void setLatest_draft(String latest_draft) {
-		this.latest_draft = latest_draft;
-	}
-
-	public String getLatest_draft_html() {
-		return latest_draft_html;
-	}
-
-	public void setLatest_draft_html(String latest_draft_html) {
-		this.latest_draft_html = latest_draft_html;
-	}
-
-	public String getPublish() {
-		return publish;
-	}
-
-	public void setPublish(String publish) {
-		this.publish = publish;
-	}
-
-	public String getSelf() {
-		return self;
-	}
-
-	public void setSelf(String self) {
-		this.self = self;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@ -1,153 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class Metadata implements Serializable {
-
-	private String access_right;
-	private List<Community> communities;
-	private List<Creator> creators;
-	private String description;
-	private String doi;
-	private List<Grant> grants;
-	private List<String> keywords;
-	private String language;
-	private String license;
-	private PrereserveDoi prereserve_doi;
-	private String publication_date;
-	private List<String> references;
-	private List<RelatedIdentifier> related_identifiers;
-	private String title;
-	private String upload_type;
-	private String version;
-
-	public String getUpload_type() {
-		return upload_type;
-	}
-
-	public void setUpload_type(String upload_type) {
-		this.upload_type = upload_type;
-	}
-
-	public String getVersion() {
-		return version;
-	}
-
-	public void setVersion(String version) {
-		this.version = version;
-	}
-
-	public String getAccess_right() {
-		return access_right;
-	}
-
-	public void setAccess_right(String access_right) {
-		this.access_right = access_right;
-	}
-
-	public List<Community> getCommunities() {
-		return communities;
-	}
-
-	public void setCommunities(List<Community> communities) {
-		this.communities = communities;
-	}
-
-	public List<Creator> getCreators() {
-		return creators;
-	}
-
-	public void setCreators(List<Creator> creators) {
-		this.creators = creators;
-	}
-
-	public String getDescription() {
-		return description;
-	}
-
-	public void setDescription(String description) {
-		this.description = description;
-	}
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public List<Grant> getGrants() {
-		return grants;
-	}
-
-	public void setGrants(List<Grant> grants) {
-		this.grants = grants;
-	}
-
-	public List<String> getKeywords() {
-		return keywords;
-	}
-
-	public void setKeywords(List<String> keywords) {
-		this.keywords = keywords;
-	}
-
-	public String getLanguage() {
-		return language;
-	}
-
-	public void setLanguage(String language) {
-		this.language = language;
-	}
-
-	public String getLicense() {
-		return license;
-	}
-
-	public void setLicense(String license) {
-		this.license = license;
-	}
-
-	public PrereserveDoi getPrereserve_doi() {
-		return prereserve_doi;
-	}
-
-	public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
-		this.prereserve_doi = prereserve_doi;
-	}
-
-	public String getPublication_date() {
-		return publication_date;
-	}
-
-	public void setPublication_date(String publication_date) {
-		this.publication_date = publication_date;
-	}
-
-	public List<String> getReferences() {
-		return references;
-	}
-
-	public void setReferences(List<String> references) {
-		this.references = references;
-	}
-
-	public List<RelatedIdentifier> getRelated_identifiers() {
-		return related_identifiers;
-	}
-
-	public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
-		this.related_identifiers = related_identifiers;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@ -1,25 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class PrereserveDoi implements Serializable {
-	private String doi;
-	private String recid;
-
-	public String getDoi() {
-		return doi;
-	}
-
-	public void setDoi(String doi) {
-		this.doi = doi;
-	}
-
-	public String getRecid() {
-		return recid;
-	}
-
-	public void setRecid(String recid) {
-		this.recid = recid;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@ -1,43 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-
-public class RelatedIdentifier implements Serializable {
-	private String identifier;
-	private String relation;
-	private String resource_type;
-	private String scheme;
-
-	public String getIdentifier() {
-		return identifier;
-	}
-
-	public void setIdentifier(String identifier) {
-		this.identifier = identifier;
-	}
-
-	public String getRelation() {
-		return relation;
-	}
-
-	public void setRelation(String relation) {
-		this.relation = relation;
-	}
-
-	public String getResource_type() {
-		return resource_type;
-	}
-
-	public void setResource_type(String resource_type) {
-		this.resource_type = resource_type;
-	}
-
-	public String getScheme() {
-		return scheme;
-	}
-
-	public void setScheme(String scheme) {
-		this.scheme = scheme;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@ -1,118 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.io.Serializable;
-import java.util.List;
-
-public class ZenodoModel implements Serializable {
-
-	private String conceptrecid;
-	private String created;
-
-	private List<File> files;
-	private String id;
-	private Links links;
-	private Metadata metadata;
-	private String modified;
-	private String owner;
-	private String record_id;
-	private String state;
-	private boolean submitted;
-	private String title;
-
-	public String getConceptrecid() {
-		return conceptrecid;
-	}
-
-	public void setConceptrecid(String conceptrecid) {
-		this.conceptrecid = conceptrecid;
-	}
-
-	public String getCreated() {
-		return created;
-	}
-
-	public void setCreated(String created) {
-		this.created = created;
-	}
-
-	public List<File> getFiles() {
-		return files;
-	}
-
-	public void setFiles(List<File> files) {
-		this.files = files;
-	}
-
-	public String getId() {
-		return id;
-	}
-
-	public void setId(String id) {
-		this.id = id;
-	}
-
-	public Links getLinks() {
-		return links;
-	}
-
-	public void setLinks(Links links) {
-		this.links = links;
-	}
-
-	public Metadata getMetadata() {
-		return metadata;
-	}
-
-	public void setMetadata(Metadata metadata) {
-		this.metadata = metadata;
-	}
-
-	public String getModified() {
-		return modified;
-	}
-
-	public void setModified(String modified) {
-		this.modified = modified;
-	}
-
-	public String getOwner() {
-		return owner;
-	}
-
-	public void setOwner(String owner) {
-		this.owner = owner;
-	}
-
-	public String getRecord_id() {
-		return record_id;
-	}
-
-	public void setRecord_id(String record_id) {
-		this.record_id = record_id;
-	}
-
-	public String getState() {
-		return state;
-	}
-
-	public void setState(String state) {
-		this.state = state;
-	}
-
-	public boolean isSubmitted() {
-		return submitted;
-	}
-
-	public void setSubmitted(boolean submitted) {
-		this.submitted = submitted;
-	}
-
-	public String getTitle() {
-		return title;
-	}
-
-	public void setTitle(String title) {
-		this.title = title;
-	}
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@ -1,7 +0,0 @@
-
-package eu.dnetlib.dhp.common.api.zenodo;
-
-import java.util.ArrayList;
-
-public class ZenodoModelList extends ArrayList<ZenodoModel> {
-}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
@ -12,7 +12,9 @@ import java.util.concurrent.TimeUnit;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.commons.lang3.time.DateUtils;
 import org.apache.http.HttpHeaders;
+import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -119,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 							.getContext()
 							.stream()
 							.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
-							.collect(Collectors.toList()));
+							.collect(Collectors.toCollection(ArrayList::new)));
 			}
 			return (T) res;
 		} else {
@ -1003,4 +1003,41 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 			.orElse(null);
 	}

+	/**
+	 * Implements bad and ugly things that we should get rid of ASAP.
+	 *
+	 * @param value
+	 * @return
+	 * @param <T>
+	 */
+	public static <T extends Oaf> T dedicatedUglyHacks(T value) {
+		if (value instanceof OafEntity) {
+			if (value instanceof Result) {
+				final Result r = (Result) value;
+
+				// Fix for AMS Acta
+				Optional
+					.ofNullable(r.getInstance())
+					.map(
+						instance -> instance
+							.stream()
+							.filter(
+								i -> Optional
+									.ofNullable(i.getHostedby())
+									.map(KeyValue::getKey)
+									.map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016"))
+									.orElse(false)))
+					.ifPresent(instance -> instance.forEach(i -> {
+						if (Optional
+							.ofNullable(i.getPid())
+							.map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta")))
+							.orElse(false)) {
+							i.setHostedby(UNKNOWN_REPOSITORY);
+						}
+					}));
+			}
+		}
+		return value;
+	}
+
 }
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -432,7 +432,10 @@ public class MergeUtils {

 		// merge datainfo for same context id
 		merge.setContext(mergeLists(merge.getContext(), enrich.getContext(), trust, Context::getId, (r, l) -> {
-			r.getDataInfo().addAll(l.getDataInfo());
+			ArrayList<DataInfo> di = new ArrayList<>();
+			di.addAll(r.getDataInfo());
+			di.addAll(l.getDataInfo());
+			r.setDataInfo(di);
 			return r;
 		}));

--- a/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json
+++ b/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json
@ -154,13 +154,5 @@
  "unknown":{
    "original":"Unknown",
    "inverse":"Unknown"
-  },
-  "isamongtopnsimilardocuments": {
-    "original": "IsAmongTopNSimilarDocuments",
-    "inverse": "HasAmongTopNSimilarDocuments"
-  },
-  "hasamongtopnsimilardocuments": {
-    "original": "HasAmongTopNSimilarDocuments",
-    "inverse": "IsAmongTopNSimilarDocuments"
  }
 }
--- a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
@ -65,13 +65,12 @@ abstract class AbstractScalaApplication(
    val conf: SparkConf = new SparkConf()
    val master = parser.get("master")
    log.info(s"Creating Spark session: Master: $master")
-    val b = SparkSession
+    SparkSession
      .builder()
      .config(conf)
      .appName(getClass.getSimpleName)
-    if (master != null)
-      b.master(master)
-    b.getOrCreate()
+      .master(master)
+      .getOrCreate()
  }

  def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
--- a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
@ -65,11 +65,7 @@ object ScholixUtils extends Serializable {
  }

  def generateScholixResourceFromResult(r: Result): ScholixResource = {
-    val sum = ScholixUtils.resultToSummary(r)
-    if (sum != null)
    generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
-    else
-      null
  }

  val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
@ -157,14 +153,6 @@ object ScholixUtils extends Serializable {

  }

-  def invRel(rel: String): String = {
-    val semanticRelation = relations.getOrElse(rel.toLowerCase, null)
-    if (semanticRelation != null)
-      semanticRelation.inverse
-    else
-      null
-  }
-
  def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
    if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
      val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
@ -389,7 +377,10 @@ object ScholixUtils extends Serializable {
    if (persistentIdentifiers.isEmpty)
      return null
    s.setLocalIdentifier(persistentIdentifiers.asJava)
-//    s.setTypology(r.getResulttype.getClassid)
+    if (r.isInstanceOf[Publication])
+      s.setTypology(Typology.publication)
+    else
+      s.setTypology(Typology.dataset)

    s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)

--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@ -1,109 +0,0 @@
-
-package eu.dnetlib.dhp.common.api;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-@Disabled
-class ZenodoAPIClientTest {
-
-	private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
-	private final String ACCESS_TOKEN = "";
-
-	private final String CONCEPT_REC_ID = "657113";
-
-	private final String depositionId = "674915";
-
-	@Test
-	void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewDeposition() throws IOException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-		Assertions.assertEquals(201, client.newDeposition());
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
-
-		String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
-
-		Assertions.assertEquals(200, client.sendMretadata(metadata));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionNewName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-	@Test
-	void testNewVersionOldName() throws IOException, MissingConceptDoiException {
-
-		ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
-			ACCESS_TOKEN);
-
-		Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
-
-		File file = new File(getClass()
-			.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
-			.getPath());
-
-		InputStream is = new FileInputStream(file);
-
-		Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
-
-		Assertions.assertEquals(202, client.publish());
-
-	}
-
-}
--- a/dhp-pace-core/pom.xml
+++ b/dhp-pace-core/pom.xml
@ -24,7 +24,7 @@
 				<executions>
 					<execution>
 						<id>scala-compile-first</id>
-						<phase>process-resources</phase>
+						<phase>initialize</phase>
 						<goals>
 							<goal>add-source</goal>
 							<goal>compile</goal>
@ -59,6 +59,14 @@
 			<groupId>edu.cmu</groupId>
 			<artifactId>secondstring</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>com.google.guava</groupId>
+			<artifactId>guava</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>com.google.code.gson</groupId>
+			<artifactId>gson</artifactId>
+		</dependency>
 		<dependency>
 			<groupId>org.apache.commons</groupId>
 			<artifactId>commons-lang3</artifactId>
@ -83,6 +91,10 @@
 			<groupId>com.fasterxml.jackson.core</groupId>
 			<artifactId>jackson-databind</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.commons</groupId>
+			<artifactId>commons-math3</artifactId>
+		</dependency>
 		<dependency>
 			<groupId>com.jayway.jsonpath</groupId>
 			<artifactId>json-path</artifactId>
@ -101,90 +113,4 @@
 		</dependency>
 	</dependencies>

-	<profiles>
-		<profile>
-			<id>spark-24</id>
-			<activation>
-				<activeByDefault>true</activeByDefault>
-			</activation>
-
-			<build>
-				<plugins>
-					<plugin>
-						<groupId>org.codehaus.mojo</groupId>
-						<artifactId>build-helper-maven-plugin</artifactId>
-						<version>3.4.0</version>
-						<executions>
-							<execution>
-								<phase>generate-sources</phase>
-								<goals>
-									<goal>add-source</goal>
-								</goals>
-								<configuration>
-									<sources>
-										<source>src/main/spark-2</source>
-									</sources>
-								</configuration>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-
-		<profile>
-			<id>spark-34</id>
-
-			<build>
-				<plugins>
-					<plugin>
-						<groupId>org.codehaus.mojo</groupId>
-						<artifactId>build-helper-maven-plugin</artifactId>
-						<version>3.4.0</version>
-						<executions>
-							<execution>
-								<phase>generate-sources</phase>
-								<goals>
-									<goal>add-source</goal>
-								</goals>
-								<configuration>
-									<sources>
-										<source>src/main/spark-2</source>
-									</sources>
-								</configuration>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-
-		<profile>
-			<id>spark-35</id>
-
-			<build>
-				<plugins>
-					<plugin>
-						<groupId>org.codehaus.mojo</groupId>
-						<artifactId>build-helper-maven-plugin</artifactId>
-						<version>3.4.0</version>
-						<executions>
-							<execution>
-								<phase>generate-sources</phase>
-								<goals>
-									<goal>add-source</goal>
-								</goals>
-								<configuration>
-									<sources>
-										<source>src/main/spark-35</source>
-									</sources>
-								</configuration>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-	</profiles>
-
 </project>
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala
@ -3,7 +3,7 @@ package eu.dnetlib.pace.model
 import com.jayway.jsonpath.{Configuration, JsonPath}
 import eu.dnetlib.pace.common.AbstractPaceFunctions
 import eu.dnetlib.pace.config.{DedupConfig, Type}
-import eu.dnetlib.pace.util.{MapDocumentUtil, SparkCompatUtils}
+import eu.dnetlib.pace.util.MapDocumentUtil
 import org.apache.commons.lang3.StringUtils
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
@ -52,7 +52,7 @@ case class SparkModel(conf: DedupConfig) {
  val orderingFieldPosition: Int = schema.fieldIndex(orderingFieldName)

  val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => {
-    df.map(r => rowFromJson(r))(SparkCompatUtils.encoderFor(schema))
+    df.map(r => rowFromJson(r))(RowEncoder(schema))
  }

  def rowFromJson(json: String): Row = {
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java
@ -1,3 +1,4 @@
+
 package eu.dnetlib.pace.tree;

 import java.util.Map;
--- a/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala
+++ b/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala
@ -1,12 +0,0 @@
-package eu.dnetlib.pace.util
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
-import org.apache.spark.sql.types.StructType
-
-object SparkCompatUtils {
-
-  def encoderFor(schema: StructType): ExpressionEncoder[Row] = {
-    RowEncoder(schema)
-  }
-}
--- a/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala
+++ b/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala
@ -1,12 +0,0 @@
-package eu.dnetlib.pace.util
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.types.StructType
-
-object SparkCompatUtils {
-
-  def encoderFor(schema: StructType): ExpressionEncoder[Row] = {
-    ExpressionEncoder(schema)
-  }
-}
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
@ -11,7 +11,6 @@ import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

 import eu.dnetlib.pace.model.Person;
-import jdk.nashorn.internal.ir.annotations.Ignore;

 public class UtilTest {

--- a/dhp-shade-package/pom.xml
+++ b/dhp-shade-package/pom.xml
@ -1,169 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>eu.dnetlib.dhp</groupId>
-        <artifactId>dhp</artifactId>
-        <version>1.2.5-SNAPSHOT</version>
-        <relativePath>../pom.xml</relativePath>
-
-    </parent>
-
-    <artifactId>dhp-shade-package</artifactId>
-    <packaging>jar</packaging>
-
-    <distributionManagement>
-        <site>
-            <id>DHPSite</id>
-            <url>${dhp.site.stage.path}/dhp-common</url>
-        </site>
-    </distributionManagement>
-
-    <description>This module create a jar of all module dependencies</description>
-
-
-    <dependencies>
-
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-actionmanager</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-<!--        <dependency>-->
-<!--            <groupId>eu.dnetlib.dhp</groupId>-->
-<!--            <artifactId>dhp-aggregation</artifactId>-->
-<!--            <version>${project.version}</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>eu.dnetlib.dhp</groupId>-->
-<!--            <artifactId>dhp-blacklist</artifactId>-->
-<!--            <version>${project.version}</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>eu.dnetlib.dhp</groupId>-->
-<!--            <artifactId>dhp-broker-events</artifactId>-->
-<!--            <version>${project.version}</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>eu.dnetlib.dhp</groupId>-->
-<!--            <artifactId>dhp-dedup-openaire</artifactId>-->
-<!--            <version>${project.version}</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>eu.dnetlib.dhp</groupId>-->
-<!--            <artifactId>dhp-enrichment</artifactId>-->
-<!--            <version>${project.version}</version>-->
-<!--        </dependency>-->
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-graph-mapper</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-graph-provision</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-impact-indicators</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-stats-actionsets</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-stats-hist-snaps</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-stats-monitor-irish</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-stats-promote</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-stats-update</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-swh</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-usage-raw-data-update</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-usage-stats-build</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-    </dependencies>
-
-
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-shade-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>shade</goal>
-                        </goals>
-                        <configuration>
-                            <transformers>
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-                                    <mainClass>eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels</mainClass>
-                                </transformer>
-                                <!-- This is needed if you have dependencies that use Service Loader. Most Google Cloud client libraries do. -->
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-                                <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                                    <resource>META-INF/cxf/bus-extensions.txt</resource>
-                                </transformer>
-                            </transformers>
-                            <filters>
-                                <filter>
-                                    <artifact>*:*</artifact>
-                                    <excludes>
-                                        <exclude>META-INF/maven/**</exclude>
-                                        <exclude>META-INF/*.SF</exclude>
-                                        <exclude>META-INF/*.DSA</exclude>
-                                        <exclude>META-INF/*.RSA</exclude>
-                                    </excludes>
-                                </filter>
-                            </filters>
-                            <relocations>
-                                <relocation>
-                                    <pattern>com</pattern>
-                                    <shadedPattern>repackaged.com.google.common</shadedPattern>
-                                    <includes>
-                                        <include>com.google.common.**</include>
-                                    </includes>
-                                </relocation>
-                            </relocations>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
-
-</project>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
@ -42,6 +42,9 @@ public class Constants {
 	public static final String NULL = "NULL";
 	public static final String NA = "N/A";

+	public static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
+	public static final String WEB_CRAWL_NAME = "Web Crawl";
+
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

 	private Constants() {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -41,9 +41,9 @@ public class PrepareAffiliationRelations implements Serializable {
 	private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelations.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static final String ID_PREFIX = "50|doi_________::";
-	public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
-	public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
-	public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
+	public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:openaireinference";
+	public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by OpenAIRE";
+	public static final String BIP_INFERENCE_PROVENANCE = "openaire:affiliation";

 	public static <I extends Result> void main(String[] args) throws Exception {

@ -71,6 +71,9 @@ public class PrepareAffiliationRelations implements Serializable {
 		final String dataciteInputPath = parser.get("dataciteInputPath");
 		log.info("dataciteInputPath: {}", dataciteInputPath);

+		final String webcrawlInputPath = parser.get("webCrawlInputPath");
+		log.info("webcrawlInputPath: {}", webcrawlInputPath);
+
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

@ -102,10 +105,16 @@ public class PrepareAffiliationRelations implements Serializable {
 				JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
 					spark, dataciteInputPath, collectedFromDatacite);

+				List<KeyValue> collectedFromWebCrawl = OafMapperUtils
+					.listKeyValues(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME);
+				JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
+					spark, webcrawlInputPath, collectedFromWebCrawl);
+
 				crossrefRelations
 					.union(pubmedRelations)
 					.union(openAPCRelations)
 					.union(dataciteRelations)
+					.union(webCrawlRelations)
 					.saveAsHadoopFile(
 						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
@ -10,7 +10,6 @@ import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.BZip2Codec;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
@ -84,7 +83,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
 				resultsRDD
 					.union(projectsRDD)
 					.saveAsHadoopFile(
-						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
+						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
 			});
 	}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
@ -21,6 +21,7 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

+import eu.dnetlib.dhp.actionmanager.Constants;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
 import eu.dnetlib.dhp.schema.oaf.utils.PidType;
+import io.netty.util.Constant;
 import scala.Tuple2;

 /**
@ -44,8 +46,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 	private static final String PMID_PREFIX = "50|pmid________::";

 	private static final String PMCID_PREFIX = "50|pmc_________::";
-	private static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
-	private static final String WEB_CRAWL_NAME = "Web Crawl";
+
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

 	public static void main(String[] args) throws Exception {
@ -104,8 +105,8 @@ public class CreateActionSetFromWebEntries implements Serializable {
 				final String ror = ROR_PREFIX
 					+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
 				ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
-				ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
-				ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
+//				ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
+//				ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));

 				return ret
 					.iterator();
@ -139,11 +140,17 @@ public class CreateActionSetFromWebEntries implements Serializable {
 				"institution", functions
 					.explode(
 						functions.col("institutions")))
+
 			.selectExpr(
-				"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
+				"id", "doi", "institution.ror as ror",
 				"institution.country_code as country_code", "publication_year")
 			.distinct();

+//			.selectExpr(
+//				"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
+//				"institution.country_code as country_code", "publication_year")
+//			.distinct();
+
 	}

 	private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
@ -214,7 +221,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 						ModelConstants.IS_AUTHOR_INSTITUTION_OF,
 						Arrays
 							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
+								OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
 						OafMapperUtils
 							.dataInfo(
 								false, null, false, false,
@ -233,7 +240,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 						ModelConstants.HAS_AUTHOR_INSTITUTION,
 						Arrays
 							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
+								OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
 						OafMapperUtils
 							.dataInfo(
 								false, null, false, false,
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
@ -1,7 +1,6 @@

 package eu.dnetlib.dhp.collection.plugin.rest;

-import java.util.Map;
 import java.util.Optional;
 import java.util.Spliterator;
 import java.util.Spliterators;
@ -10,8 +9,6 @@ import java.util.stream.StreamSupport;

 import org.apache.commons.lang3.StringUtils;

-import com.google.gson.Gson;
-
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
 import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
@ -50,9 +47,6 @@ public class RestCollectorPlugin implements CollectorPlugin {
 		final String entityXpath = api.getParams().get("entityXpath");
 		final String authMethod = api.getParams().get("authMethod");
 		final String authToken = api.getParams().get("authToken");
-		final String requestHeaderMap = api.getParams().get("requestHeaderMap");
-		Gson gson = new Gson();
-		Map requestHeaders = gson.fromJson(requestHeaderMap, Map.class);
 		final String resultSizeValue = Optional
 			.ofNullable(api.getParams().get("resultSizeValue"))
 			.filter(StringUtils::isNotBlank)
@ -70,6 +64,9 @@ public class RestCollectorPlugin implements CollectorPlugin {
 		if (StringUtils.isBlank(resultFormatValue)) {
 			throw new CollectorException("Param 'resultFormatValue' is null or empty");
 		}
+		if (StringUtils.isBlank(queryParams)) {
+			throw new CollectorException("Param 'queryParams' is null or empty");
+		}
 		if (StringUtils.isBlank(entityXpath)) {
 			throw new CollectorException("Param 'entityXpath' is null or empty");
 		}
@ -95,8 +92,7 @@ public class RestCollectorPlugin implements CollectorPlugin {
 			entityXpath,
 			authMethod,
 			authToken,
-			resultOutputFormat,
-			requestHeaders);
+			resultOutputFormat);

 		return StreamSupport
 			.stream(
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@ -9,11 +9,8 @@ import java.net.URL;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
-import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.PriorityBlockingQueue;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;

 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
@ -21,18 +18,22 @@ import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
-import javax.xml.xpath.*;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpHeaders;
+import org.apache.http.entity.ContentType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;

-import com.google.common.collect.Maps;
-
 import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
 import eu.dnetlib.dhp.common.collection.CollectorException;
 import eu.dnetlib.dhp.common.collection.HttpClientParams;
@ -47,23 +48,20 @@ import eu.dnetlib.dhp.common.collection.HttpClientParams;
 *
 */
 public class RestIterator implements Iterator<String> {
+
 	private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
 	public static final String UTF_8 = "UTF-8";
 	private static final int MAX_ATTEMPTS = 5;

 	private final HttpClientParams clientParams;

-	private final String AUTHBASIC = "basic";
-
-	private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
-	private static final String EMPTY_XML = XML_HEADER + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG
-		+ ">";
+	private final String BASIC = "basic";

 	private final String baseUrl;
 	private final String resumptionType;
 	private final String resumptionParam;
 	private final String resultFormatValue;
-	private String queryParams = "";
+	private String queryParams;
 	private final int resultSizeValue;
 	private int resumptionInt = 0; // integer resumption token (first record to harvest)
 	private int resultTotal = -1;
@ -91,11 +89,6 @@ public class RestIterator implements Iterator<String> {
 	 */
 	private final String resultOutputFormat;

-	/*
-	 * Can be used to set additional request headers, like for content negotiation
-	 */
-	private Map<String, String> requestHeaders;
-
 	/**
 	 * RestIterator class compatible to version 1.3.33
 	 */
@ -114,8 +107,7 @@ public class RestIterator implements Iterator<String> {
 		final String entityXpath,
 		final String authMethod,
 		final String authToken,
-		final String resultOutputFormat,
-		final Map<String, String> requestHeaders) {
+		final String resultOutputFormat) {

 		this.clientParams = clientParams;
 		this.baseUrl = baseUrl;
@ -127,7 +119,6 @@ public class RestIterator implements Iterator<String> {
 		this.authMethod = authMethod;
 		this.authToken = authToken;
 		this.resultOutputFormat = resultOutputFormat;
-		this.requestHeaders = requestHeaders != null ? requestHeaders : Maps.newHashMap();

 		this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
 			: "";
@ -157,12 +148,7 @@ public class RestIterator implements Iterator<String> {
 	}

 	private void initQueue() {
-		if (queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) {
-			query = baseUrl;
-		} else {
-			query = baseUrl + "?" + queryParams + querySize + queryFormat;
-		}
-
+		this.query = this.baseUrl + "?" + this.queryParams + this.querySize + this.queryFormat;
 		log.info("REST calls starting with {}", this.query);
 	}

@ -176,23 +162,11 @@ public class RestIterator implements Iterator<String> {
 	 */
 	@Override
 	public boolean hasNext() {
-		synchronized (this.recordQueue) {
-			while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
-				try {
-					this.query = downloadPage(this.query, 0);
-				} catch (final CollectorException e) {
-					log.debug("CollectorPlugin.next()-Exception: {}", e);
-					throw new RuntimeException(e);
-				}
-			}
-
-			if (!this.recordQueue.isEmpty()) {
-				return true;
-			}
-
+		if (this.recordQueue.isEmpty() && this.query.isEmpty()) {
 			disconnect();
 			return false;
 		}
+		return true;
 	}

 	/*
@ -202,6 +176,14 @@ public class RestIterator implements Iterator<String> {
 	@Override
 	public String next() {
 		synchronized (this.recordQueue) {
+			while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
+				try {
+					this.query = downloadPage(this.query, 0);
+				} catch (final CollectorException e) {
+					log.debug("CollectorPlugin.next()-Exception: {}", e);
+					throw new RuntimeException(e);
+				}
+			}
 			return this.recordQueue.poll();
 		}
 	}
@ -227,8 +209,9 @@ public class RestIterator implements Iterator<String> {

 		try {
 			String resultJson;
-			String resultXml = XML_HEADER;
+			String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 			String nextQuery = "";
+			final String emptyXml = resultXml + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG + ">";
 			Node resultNode = null;
 			NodeList nodeList = null;
 			String qUrlArgument = "";
@ -243,48 +226,37 @@ public class RestIterator implements Iterator<String> {
 				}
 			}

-			// find pagination page start number in queryParam and remove before start the first query
-			if ((resumptionType.toLowerCase().equals("pagination") || resumptionType.toLowerCase().equals("page"))
-				&& (query.contains("paginationStart="))) {
-
-				final Matcher m = Pattern.compile("paginationStart=([0-9]+)").matcher(query);
-				m.find(); // guaranteed to be true for this regex
-
-				String[] pageVal = m.group(0).split("=");
-				pagination = Integer.parseInt(pageVal[1]);
-
-				// remove page start number from query and queryParams
-				queryParams = queryParams.replaceFirst("&?paginationStart=[0-9]+", "");
-				query = query.replaceFirst("&?paginationStart=[0-9]+", "");
-
-			}
-
 			try {
 				log.info("requesting URL [{}]", query);

 				final URL qUrl = new URL(query);
 				log.debug("authMethod: {}", this.authMethod);
-				if (this.authMethod == "bearer") {
-					log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
-					requestHeaders.put("Authorization", "Bearer " + authToken);
-					// requestHeaders.put("Content-Type", "application/json");
-				} else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
-					log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
-					requestHeaders.put("Authorization", "Basic " + authToken);
-					// requestHeaders.put("accept", "application/xml");
-				}
-				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+				if ("bearer".equalsIgnoreCase(this.authMethod)) {
+					log.trace("authMethod before inputStream: {}", resultXml);
+					final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+					conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + this.authToken);
+					conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
 					conn.setRequestMethod("GET");
-				this.setRequestHeader(conn);
-				resultStream = conn.getInputStream();
+					theHttpInputStream = conn.getInputStream();
+				} else if (this.BASIC.equalsIgnoreCase(this.authMethod)) {
+					log.trace("authMethod before inputStream: {}", resultXml);
+					final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
+					conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + this.authToken);
+					conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
+					conn.setRequestMethod("GET");
+					theHttpInputStream = conn.getInputStream();
+				} else {
+					theHttpInputStream = qUrl.openStream();
+				}

+				this.resultStream = theHttpInputStream;
 				if ("json".equals(this.resultOutputFormat)) {
 					resultJson = IOUtils.toString(this.resultStream, StandardCharsets.UTF_8);
 					resultXml = JsonUtils.convertToXML(resultJson);
 					this.resultStream = IOUtils.toInputStream(resultXml, UTF_8);
 				}

-				if (!isEmptyXml(resultXml)) {
+				if (!(emptyXml).equalsIgnoreCase(resultXml)) {
 					resultNode = (Node) this.xpath
 						.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
 					nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
@ -293,7 +265,8 @@ public class RestIterator implements Iterator<String> {
 						final StringWriter sw = new StringWriter();
 						this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
 						final String toEnqueue = sw.toString();
-						if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) {
+						if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue)
+							|| emptyXml.equalsIgnoreCase(toEnqueue)) {
 							log
 								.warn(
 									"The following record resulted in empty item for the feeding queue: {}", resultXml);
@ -321,7 +294,6 @@ public class RestIterator implements Iterator<String> {
 							throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
 						}
 						qUrlArgument = qUrl.getQuery();
-
 						final String[] arrayQUrlArgument = qUrlArgument.split("&");
 						for (final String arrayUrlArgStr : arrayQUrlArgument) {
 							if (arrayUrlArgStr.startsWith(this.resumptionParam)) {
@ -335,7 +307,7 @@ public class RestIterator implements Iterator<String> {
 							}
 						}

-						if (isEmptyXml(resultXml)
+						if (((emptyXml).equalsIgnoreCase(resultXml))
 							|| ((nodeList != null) && (nodeList.getLength() < this.resultSizeValue))) {
 							// resumptionStr = "";
 							if (nodeList != null) {
@ -354,13 +326,13 @@ public class RestIterator implements Iterator<String> {

 					case "pagination":
 					case "page": // pagination, iterate over page numbers
-						if (nodeList != null && nodeList.getLength() > 0) {
+						this.pagination += 1;
+						if (nodeList != null) {
 							this.discoverResultSize += nodeList.getLength();
 						} else {
 							this.resultTotal = this.discoverResultSize;
 							this.pagination = this.discoverResultSize;
 						}
-						this.pagination += 1;
 						this.resumptionInt = this.pagination;
 						this.resumptionStr = Integer.toString(this.resumptionInt);
 						break;
@ -408,8 +380,7 @@ public class RestIterator implements Iterator<String> {
 			try {
 				if (this.resultTotal == -1) {
 					this.resultTotal = Integer.parseInt(this.xprResultTotalPath.evaluate(resultNode));
-					if ("page".equalsIgnoreCase(this.resumptionType)
-						&& !this.AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
+					if ("page".equalsIgnoreCase(this.resumptionType) && !this.BASIC.equalsIgnoreCase(this.authMethod)) {
 						this.resultTotal += 1;
 					} // to correct the upper bound
 					log.info("resultTotal was -1 is now: " + this.resultTotal);
@ -438,10 +409,6 @@ public class RestIterator implements Iterator<String> {

 	}

-	private boolean isEmptyXml(String s) {
-		return EMPTY_XML.equalsIgnoreCase(s);
-	}
-
 	private boolean isInteger(final String s) {
 		boolean isValidInteger = false;
 		try {
@ -466,22 +433,6 @@ public class RestIterator implements Iterator<String> {
 		}
 	}

-	/**
-	 * setRequestHeader
-	 *
-	 * setRequestProperty: Sets the general request property. If a property with the key already exists, overwrite its value with the new value.
-	 * @param conn
-	 */
-	private void setRequestHeader(HttpURLConnection conn) {
-		if (requestHeaders != null) {
-			for (String key : requestHeaders.keySet()) {
-				conn.setRequestProperty(key, requestHeaders.get(key));
-			}
-			log.debug("Set Request Header with: " + requestHeaders);
-		}
-
-	}
-
 	public String getResultFormatValue() {
 		return this.resultFormatValue;
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java
@ -8,10 +8,7 @@ import java.io.StringWriter;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
-import java.util.Arrays;
 import java.util.Iterator;
-import java.util.List;
-import java.util.stream.Collectors;

 import javax.xml.stream.XMLEventFactory;
 import javax.xml.stream.XMLEventReader;
@ -22,7 +19,6 @@ import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.events.StartElement;
 import javax.xml.stream.events.XMLEvent;

-import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

@ -62,23 +58,13 @@ public class XMLIterator implements Iterator<String> {

 	private String element;

-	private List<String> elements;
-
 	private InputStream inputStream;

 	public XMLIterator(final String element, final InputStream inputStream) {
 		super();
 		this.element = element;
-		if (element.contains(",")) {
-			elements = Arrays
-				.stream(element.split(","))
-				.filter(StringUtils::isNoneBlank)
-				.map(String::toLowerCase)
-				.collect(Collectors.toList());
-		}
 		this.inputStream = inputStream;
 		this.parser = getParser();
-
 		try {
 			this.current = findElement(parser);
 		} catch (XMLStreamException e) {
@ -127,7 +113,7 @@ public class XMLIterator implements Iterator<String> {
 				final XMLEvent event = parser.nextEvent();

 				// TODO: replace with depth tracking instead of close tag tracking.
-				if (event.isEndElement() && isCheckTag(event.asEndElement().getName().getLocalPart())) {
+				if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) {
 					writer.add(event);
 					break;
 				}
@ -156,48 +142,31 @@ public class XMLIterator implements Iterator<String> {
 		XMLEvent peek = parser.peek();
 		if (peek != null && peek.isStartElement()) {
 			String name = peek.asStartElement().getName().getLocalPart();
-			if (isCheckTag(name))
+			if (element.equals(name)) {
 				return peek;
 			}
+		}

 		while (parser.hasNext()) {
-			XMLEvent event = parser.nextEvent();
+			final XMLEvent event = parser.nextEvent();
 			if (event != null && event.isStartElement()) {
 				String name = event.asStartElement().getName().getLocalPart();
-				if (isCheckTag(name))
+				if (element.equals(name)) {
 					return event;
 				}
 			}
+		}
 		return null;
 	}

 	private XMLEventReader getParser() {
 		try {
-			XMLInputFactory xif = inputFactory.get();
-			xif.setProperty(XMLInputFactory.SUPPORT_DTD, false);
-			return xif.createXMLEventReader(sanitize(inputStream));
+			return inputFactory.get().createXMLEventReader(sanitize(inputStream));
 		} catch (XMLStreamException e) {
 			throw new RuntimeException(e);
 		}
 	}

-	private boolean isCheckTag(final String tagName) {
-		if (elements != null) {
-			final String found = elements
-				.stream()
-				.filter(e -> e.equalsIgnoreCase(tagName))
-				.findFirst()
-				.orElse(null);
-			if (found != null)
-				return true;
-		} else {
-			if (element.equalsIgnoreCase(tagName)) {
-				return true;
-			}
-		}
-		return false;
-	}
-
 	private Reader sanitize(final InputStream in) {
 		final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
 		charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DataFetcher.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DataFetcher.java
@ -0,0 +1,134 @@
+package eu.dnetlib.dhp.transformation.xslt;
+
+import java.io.Serializable;
+import net.sf.saxon.s9api.*;
+
+import org.apache.commons.io.IOUtils;
+import org.json.JSONObject;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class fetches JSON from a provided link and returns
+ * a Dublin Core. This functionality is particularly needed for OSF Preprints
+ */
+
+
+public class DataFetcher implements ExtensionFunction, Serializable {
+
+    /**
+     * This method fetches JSON object from a given URL
+     * @param url a url in the metadata for fetching authors in JSON format
+     * @return
+     * @throws IOException
+     */
+    static JSONObject getJson(URL url) throws IOException {
+
+        String json = IOUtils.toString(url);
+        return new JSONObject(json);
+    }
+
+    /**
+     * This method extracts authors from a given JSON
+     *
+     * @param jsonObject
+     * @return
+     */
+    static List<String> getAuthorsFromJson(JSONObject jsonObject) {
+        List<String> authors = new ArrayList<>();
+        // count of authors
+        int countOfAuthors = jsonObject.getJSONArray("data").length();
+        for (int i = 0; i < countOfAuthors; i++) {
+
+            authors.add(jsonObject
+                    .getJSONArray("data")
+                    .getJSONObject(i)
+                    .getJSONObject("embeds")
+                    .getJSONObject("users")
+                    .getJSONObject("data")
+                    .getJSONObject("attributes")
+                    .getString("full_name"));
+        }
+        return authors;
+    }
+
+    /**
+     * This method transforms list of authors into Dublin Core
+     * @param authors
+     * @return Dublin Core list of authors
+     */
+    static List<String> transformListToDublinCore(List<String> authors) {
+
+        List<String> dublinCoreAuthors = new ArrayList<>();
+        for (String author : authors){
+
+            //splitting full name into first and last names according to OpenAIRE v3 guidelines at:
+            // https://guidelines.openaire.eu/en/latest/literature/field_creator.html
+            // “surname”, “initials” (“first name”) “prefix”.
+            String[] parts = author.split(" ");
+            String firstName = parts[0];
+            String lastName = parts[1];
+            char initialOfFirstName = firstName.charAt(0);
+
+            dublinCoreAuthors.add(
+                    "<dc:creator>" + lastName + ", " + initialOfFirstName +  ". (" + firstName + ")" + "</dc:creator>");
+        }
+        return dublinCoreAuthors;
+    }
+
+     /**
+     * This is a public method which fetches authors and transform them into Dublin Core
+     */
+    public static String getAndTransformAuthors(URL url) throws IOException{
+        return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
+    }
+
+
+    /**
+     * This method extracts link to fulltext from a given JSON
+     *
+     * @return
+     */
+    static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
+
+        // note: Link to JSON containing fulltextlink is in "primary_file" attribute.
+        // And in the resultant JSON,  “links->download” contains the URL to fulltext
+
+        return jsonObject
+                .getJSONObject("data")
+                .getJSONObject("links")
+                .getString("download");
+    }
+
+    /**
+     * This is a public method which fetches link to full text and returns it as a suitable format
+     */
+    public static String getFullTextLinkAndTransform (URL url )throws IOException{
+
+        return getLinkToFulltextFromJson(getJson(url));
+    }
+
+
+    @Override
+    public QName getName() {
+        return null;
+    }
+
+    @Override
+    public SequenceType getResultType() {
+        return null;
+    }
+
+    @Override
+    public SequenceType[] getArgumentTypes() {
+        return new SequenceType[0];
+    }
+
+    @Override
+    public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
+        return null;
+    }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
@ -55,6 +55,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
 		processor.registerExtensionFunction(new DateCleaner());
 		processor.registerExtensionFunction(new PersonCleaner());

+		processor.registerExtensionFunction(new DataFetcher());
+
 		final XsltCompiler comp = processor.newXsltCompiler();
 		QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
 		comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
@ -28,7 +28,13 @@
    "paramLongName": "dataciteInputPath",
    "paramDescription": "the path to get the input data from Datacite",
    "paramRequired": true
-  },
+  },{
+  "paramName": "wip",
+  "paramLongName": "webCrawlInputPath",
+  "paramDescription": "the path to get the input data from Web Crawl",
+  "paramRequired": true
+}
+,
  {
    "paramName": "o",
    "paramLongName": "outputPath",
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -35,5 +35,6 @@ crossrefInputPath=/data/bip-affiliations/crossref-data.json
 pubmedInputPath=/data/bip-affiliations/pubmed-data.json
 openapcInputPath=/data/bip-affiliations/openapc-data.json
 dataciteInputPath=/data/bip-affiliations/datacite-data.json
+webCrawlInputPath=/data/bip-affiliations/webCrawl/

 outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -17,6 +17,10 @@
            <name>dataciteInputPath</name>
            <description>the path where to find the inferred affiliation relations from Datacite</description>
        </property>
+        <property>
+            <name>webCrawlInputPath</name>
+            <description>the path where to find the inferred affiliation relations from webCrawl</description>
+        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -112,7 +116,7 @@
            <arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
            <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
            <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
-
+            <arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml
@ -1,4 +1,4 @@
-    <workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
@ -8,19 +8,40 @@
            <name>database</name>
            <description>the PDB Database Working Path</description>
        </property>
-
        <property>
-            <name>targetPath</name>
-            <description>the Target Working dir path</description>
+            <name>mdStoreOutputId</name>
+            <description>the identifier of the cleaned MDStore</description>
+        </property>
+        <property>
+            <name>mdStoreManagerURI</name>
+            <description>the path of the cleaned mdstore</description>
        </property>
    </parameters>

-    <start to="ConvertDB"/>
+    <start to="StartTransaction"/>
+

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

+    <action name="StartTransaction">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+            <arg>--action</arg><arg>NEW_VERSION</arg>
+            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
+            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+            <capture-output/>
+        </java>
+        <ok to="ConvertDB"/>
+        <error to="RollBack"/>
+    </action>
    <action name="ConvertDB">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -41,11 +62,48 @@
            <arg>--master</arg><arg>yarn</arg>
            <arg>--dbPath</arg><arg>${sourcePath}</arg>
            <arg>--database</arg><arg>${database}</arg>
-            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
        </spark>
+        <ok to="CommitVersion"/>
+        <error to="RollBack"/>
+
+    </action>
+        <action name="CommitVersion">
+            <java>
+                <configuration>
+                    <property>
+                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                        <value>true</value>
+                    </property>
+                </configuration>
+                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+                <arg>--action</arg><arg>COMMIT</arg>
+                <arg>--namenode</arg><arg>${nameNode}</arg>
+                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+            </java>
            <ok to="End"/>
            <error to="Kill"/>
        </action>
+
+        <action name="RollBack">
+            <java>
+                <configuration>
+                    <property>
+                        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                        <value>true</value>
+                    </property>
+                </configuration>
+                <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+                <arg>--action</arg><arg>ROLLBACK</arg>
+                <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+                <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+            </java>
+            <ok to="Kill"/>
+            <error to="Kill"/>
+        </action>
+
+
        <end name="End"/>

 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json
@ -2,5 +2,5 @@
  {"paramName":"mt",  "paramLongName":"master",       "paramDescription": "should be local or yarn",                  "paramRequired": true},
  {"paramName":"db",  "paramLongName":"database",     "paramDescription": "should be PDB or UNIPROT",                 "paramRequired": true},
  {"paramName":"p",   "paramLongName":"dbPath",       "paramDescription": "the path of the database to transform",    "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath",   "paramDescription": "the OAF target path ",                     "paramRequired": true}
+  {"paramName":"mo",   "paramLongName":"mdstoreOutputVersion",     "paramDescription": "the oaf path ",                "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json
@ -1,5 +1,20 @@
 [
-  {"paramName":"mt",  "paramLongName":"master",     "paramDescription": "should be local or yarn",                  "paramRequired": true},
-  {"paramName":"s",   "paramLongName":"sourcePath","paramDescription": "the source Path",                              "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath","paramDescription": "the  oaf path ",  "paramRequired": true}
+  {
+    "paramName": "mt",
+    "paramLongName": "master",
+    "paramDescription": "should be local or yarn",
+    "paramRequired": true
+  },
+  {
+    "paramName": "s",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the source Path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "mo",
+    "paramLongName": "mdstoreOutputVersion",
+    "paramDescription": "the oaf path ",
+    "paramRequired": true
+  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml
@ -9,34 +9,26 @@
            <description>the Working Path</description>
        </property>
        <property>
-            <name>targetPath</name>
-            <description>the OAF MDStore Path</description>
+            <name>mdStoreOutputId</name>
+            <description>the identifier of the cleaned MDStore</description>
        </property>
        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
-        </property>
-        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
+            <name>mdStoreManagerURI</name>
+            <description>the path of the cleaned mdstore</description>
        </property>
        <property>
            <name>resumeFrom</name>
-            <value>DownloadEBILinks</value>
+            <value>CreateEBIDataSet</value>
            <description>node to start</description>
        </property>
    </parameters>

-    <start to="resume_from"/>
+    <start to="StartTransaction"/>

    <decision name="resume_from">
        <switch>
            <case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
-            <case to="CreateEBIDataSet">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
+            <case to="StartTransaction">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
            <default to="DownloadEBILinks"/>
        </switch>
    </decision>
@ -77,9 +69,29 @@
            <move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
            <move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
        </fs>
-        <ok to="CreateEBIDataSet"/>
+        <ok to="StartTransaction"/>
        <error to="Kill"/>
    </action>
+
+    <action name="StartTransaction">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+            <arg>--action</arg><arg>NEW_VERSION</arg>
+            <arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
+            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+            <capture-output/>
+        </java>
+        <ok to="CreateEBIDataSet"/>
+        <error to="RollBack"/>
+    </action>
+
+
    <action name="CreateEBIDataSet">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
@ -95,11 +107,49 @@
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
-            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
            <arg>--master</arg><arg>yarn</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
+
+
+    <action name="CommitVersion">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+            <arg>--action</arg><arg>COMMIT</arg>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="RollBack">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
+            <arg>--action</arg><arg>ROLLBACK</arg>
+            <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
+            <arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
+        </java>
+        <ok to="Kill"/>
+        <error to="Kill"/>
+    </action>
+
    <end name="End"/>
+
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
@ -1025,7 +1025,6 @@ case object Crossref2Oaf {
            tp._1 match {
              case "electronic" => journal.setIssnOnline(tp._2)
              case "print"      => journal.setIssnPrinted(tp._2)
-              case _            =>
            }
          })
        }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -231,7 +231,7 @@ object BioDBToOAF {
  def uniprotToOAF(input: String): List[Oaf] = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)
-    val pid = (json \ "pid").extract[String]
+    val pid = (json \ "pid").extract[String].trim()

    val d = new Dataset

--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala
@ -2,12 +2,15 @@ package eu.dnetlib.dhp.sx.bio

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.collection.CollectionUtils
+import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
 import eu.dnetlib.dhp.schema.oaf.Oaf
 import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
+import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkTransformBioDatabaseToOAF {

@ -25,8 +28,13 @@ object SparkTransformBioDatabaseToOAF {

    val dbPath: String = parser.get("dbPath")
    log.info("dbPath: {}", database)
-    val targetPath: String = parser.get("targetPath")
-    log.info("targetPath: {}", database)
+
+    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
+    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
+
+    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
+    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
+    log.info("outputBasePath: {}", outputBasePath)

    val spark: SparkSession =
      SparkSession
@ -43,24 +51,28 @@ object SparkTransformBioDatabaseToOAF {
      case "UNIPROT" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
-          targetPath
+          s"$outputBasePath/$MDSTORE_DATA_PATH"
        )
      case "PDB" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
-          targetPath
+          s"$outputBasePath/$MDSTORE_DATA_PATH"
        )
      case "SCHOLIX" =>
        CollectionUtils.saveDataset(
          spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
-          targetPath
+          s"$outputBasePath/$MDSTORE_DATA_PATH"
        )
      case "CROSSREF_LINKS" =>
        CollectionUtils.saveDataset(
          spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
-          targetPath
+          s"$outputBasePath/$MDSTORE_DATA_PATH"
        )
    }
+
+    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
+    val mdStoreSize = df.count
+    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }

 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala
@ -2,9 +2,12 @@ package eu.dnetlib.dhp.sx.bio.ebi

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.collection.CollectionUtils
+import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
-import eu.dnetlib.dhp.schema.oaf.Oaf
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
+import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
 import eu.dnetlib.dhp.sx.bio.pubmed._
+import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
 import eu.dnetlib.dhp.utils.ISLookupClientFactory
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
@ -14,13 +17,13 @@ import org.apache.http.client.methods.HttpGet
 import org.apache.http.impl.client.HttpClientBuilder
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql._
 import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql._
 import org.slf4j.{Logger, LoggerFactory}

-import java.io.{ByteArrayInputStream, InputStream}
-import java.nio.charset.Charset
-import javax.xml.stream.XMLInputFactory
+import java.io.InputStream
+import scala.io.Source
+import scala.xml.pull.XMLEventReader

 object SparkCreateBaselineDataFrame {

@ -83,7 +86,7 @@ object SparkCreateBaselineDataFrame {
          if (response.getStatusLine.getStatusCode > 400) {
            tries -= 1
          } else
-            return IOUtils.toString(response.getEntity.getContent, Charset.defaultCharset())
+            return IOUtils.toString(response.getEntity.getContent)
        } catch {
          case e: Throwable =>
            println(s"Error on requesting ${r.getURI}")
@ -155,8 +158,7 @@ object SparkCreateBaselineDataFrame {
      IOUtils.toString(
        SparkEBILinksToOaf.getClass.getResourceAsStream(
          "/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"
-        ),
-        Charset.defaultCharset()
+        )
      )
    )
    parser.parseArgument(args)
@ -165,11 +167,15 @@ object SparkCreateBaselineDataFrame {
    val workingPath = parser.get("workingPath")
    log.info("workingPath: {}", workingPath)

-    val targetPath = parser.get("targetPath")
-    log.info("targetPath: {}", targetPath)
+    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
+    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
+
+    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
+    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
+    log.info("outputBasePath: {}", outputBasePath)

    val hdfsServerUri = parser.get("hdfsServerUri")
-    log.info("hdfsServerUri: {}", targetPath)
+    log.info("hdfsServerUri: {}", hdfsServerUri)

    val skipUpdate = parser.get("skipUpdate")
    log.info("skipUpdate: {}", skipUpdate)
@ -195,11 +201,10 @@ object SparkCreateBaselineDataFrame {
    if (!"true".equalsIgnoreCase(skipUpdate)) {
      downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri)
      val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000)
-      val inputFactory = XMLInputFactory.newInstance
      val ds: Dataset[PMArticle] = spark.createDataset(
        k.filter(i => i._1.endsWith(".gz"))
          .flatMap(i => {
-            val xml = inputFactory.createXMLEventReader(new ByteArrayInputStream(i._2.getBytes()))
+            val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes()))
            new PMParser(xml)
          })
      )
@ -218,8 +223,11 @@ object SparkCreateBaselineDataFrame {
        .map(a => PubMedToOaf.convert(a, vocabularies))
        .as[Oaf]
        .filter(p => p != null),
-      targetPath
+      s"$outputBasePath/$MDSTORE_DATA_PATH"
    )

+    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
+    val mdStoreSize = df.count
+    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala
@ -9,6 +9,9 @@ import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql._
 import org.slf4j.{Logger, LoggerFactory}
+import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
+import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
+import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}

 object SparkEBILinksToOaf {

@ -32,8 +35,13 @@ object SparkEBILinksToOaf {
    import spark.implicits._
    val sourcePath = parser.get("sourcePath")
    log.info(s"sourcePath  -> $sourcePath")
-    val targetPath = parser.get("targetPath")
-    log.info(s"targetPath  -> $targetPath")
+    val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
+    log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
+
+    val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
+    val outputBasePath = cleanedMdStoreVersion.getHdfsPath
+    log.info("outputBasePath: {}", outputBasePath)
+
    implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])

    val ebLinks: Dataset[EBILinkItem] = spark.read
@ -46,7 +54,10 @@ object SparkEBILinksToOaf {
        .flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
        .filter(p => BioDBToOAF.EBITargetLinksFilter(p))
        .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
-      targetPath
+      s"$outputBasePath/$MDSTORE_DATA_PATH"
    )
+    val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
+    val mdStoreSize = df.count
+    writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala
@ -1,8 +1,7 @@
 package eu.dnetlib.dhp.sx.bio.pubmed

 import scala.xml.MetaData
-import javax.xml.stream.XMLEventReader
-import scala.xml.pull.{EvElemEnd, EvElemStart, EvText}
+import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader}

 /** @param xml
  */
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -88,6 +88,7 @@ public class PrepareAffiliationRelationsTest {
 					"-pubmedInputPath", crossrefAffiliationRelationPath,
 					"-openapcInputPath", crossrefAffiliationRelationPath,
 					"-dataciteInputPath", crossrefAffiliationRelationPath,
+					"-webCrawlInputPath", crossrefAffiliationRelationPath,
 					"-outputPath", outputPath
 				});

@ -104,7 +105,7 @@ public class PrepareAffiliationRelationsTest {
 //            );
 //        }
 		// count the number of relations
-		assertEquals(80, tmp.count());
+		assertEquals(120, tmp.count());

 		Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
 		dataset.createOrReplaceTempView("result");
@ -115,7 +116,7 @@ public class PrepareAffiliationRelationsTest {
 		// verify that we have equal number of bi-directional relations
 		Assertions
 			.assertEquals(
-				40, execVerification
+				60, execVerification
 					.filter(
 						"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
 					.collectAsList()
@ -123,7 +124,7 @@ public class PrepareAffiliationRelationsTest {

 		Assertions
 			.assertEquals(
-				40, execVerification
+				60, execVerification
 					.filter(
 						"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
 					.collectAsList()
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
@ -15,7 +15,10 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
-import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java
@ -119,9 +119,7 @@ public class ReadCOCITest {
 					workingDir.toString() + "/COCI",
 					"-outputPath",
 					workingDir.toString() + "/COCI_json/",
-					"-inputFile", "input1;input2;input3;input4;input5",
-					"-format",
-					"COCI"
+					"-inputFile", "input1;input2;input3;input4;input5"
 				});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java
@ -1,64 +0,0 @@
-
-package eu.dnetlib.dhp.collection.plugin.file;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Objects;
-import java.util.stream.Stream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.junit.jupiter.api.*;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.mockito.junit.jupiter.MockitoExtension;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
-import eu.dnetlib.dhp.common.collection.CollectorException;
-
-@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
-@ExtendWith(MockitoExtension.class)
-public class FileGZipMultipleNodeTest {
-
-	private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
-
-	private final ApiDescriptor api = new ApiDescriptor();
-
-	private FileGZipCollectorPlugin plugin;
-
-	private static final String SPLIT_ON_ELEMENT = "incollection,article";
-
-	@BeforeEach
-	public void setUp() throws IOException {
-
-		final String gzipFile = Objects
-			.requireNonNull(
-				this
-					.getClass()
-					.getResource("/eu/dnetlib/dhp/collection/plugin/file/dblp.gz"))
-			.getFile();
-
-		api.setBaseUrl(gzipFile);
-
-		HashMap<String, String> params = new HashMap<>();
-		params.put("splitOnElement", SPLIT_ON_ELEMENT);
-
-		api.setParams(params);
-
-		FileSystem fs = FileSystem.get(new Configuration());
-		plugin = new FileGZipCollectorPlugin(fs);
-	}
-
-	@Test
-	void test() throws CollectorException {
-
-		final Stream<String> stream = plugin.collect(api, new AggregatorReport());
-
-		stream.limit(10).forEach(s -> {
-			Assertions.assertTrue(s.length() > 0);
-			log.info(s);
-		});
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java
@ -36,11 +36,11 @@ public class OsfPreprintCollectorTest {
 	private final String resultTotalXpath = "/*/*[local-name()='links']/*[local-name()='meta']/*[local-name()='total']";

 	private final String resumptionParam = "page";
-	private final String resumptionType = "scan";
-	private final String resumptionXpath = "substring-before(substring-after(/*/*[local-name()='links']/*[local-name()='next'], 'page='), '&')";
+	private final String resumptionType = "page";
+	private final String resumptionXpath = "/*/*[local-name()='links']/*[local-name()='next']";

-	private final String resultSizeParam = "page[size]";
-	private final String resultSizeValue = "100";
+	private final String resultSizeParam = "";
+	private final String resultSizeValue = "";

 	private final String resultFormatParam = "format";
 	private final String resultFormatValue = "json";
@ -74,7 +74,7 @@ public class OsfPreprintCollectorTest {
 		final AtomicInteger i = new AtomicInteger(0);
 		final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());

-		stream.limit(2000).forEach(s -> {
+		stream.limit(200).forEach(s -> {
 			Assertions.assertTrue(s.length() > 0);
 			i.incrementAndGet();
 			log.info(s);
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java
@ -4,11 +4,6 @@

 package eu.dnetlib.dhp.collection.plugin.rest;

-import java.io.IOException;
-import java.io.InputStream;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
 import java.util.HashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
@ -17,8 +12,6 @@ import org.junit.jupiter.api.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.google.gson.Gson;
-
 import eu.dnetlib.dhp.collection.ApiDescriptor;
 import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
 import eu.dnetlib.dhp.common.collection.CollectorException;
@ -32,18 +25,18 @@ class RestCollectorPluginTest {

 	private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);

-	private final String baseUrl = "https://ddh-openapi.worldbank.org/search";
-	private final String resumptionType = "discover";
-	private final String resumptionParam = "skip";
-	private final String entityXpath = "//*[local-name()='data']";
-	private final String resumptionXpath = "";
-	private final String resultTotalXpath = "//*[local-name()='count']";
-	private final String resultFormatParam = "";
+	private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
+	private final String resumptionType = "count";
+	private final String resumptionParam = "from";
+	private final String entityXpath = "//hits/hits";
+	private final String resumptionXpath = "//hits";
+	private final String resultTotalXpath = "//hits/total";
+	private final String resultFormatParam = "format";
 	private final String resultFormatValue = "json";
-	private final String resultSizeParam = "top";
+	private final String resultSizeParam = "size";
 	private final String resultSizeValue = "10";
 	// private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
-	private final String query = "";
+	private final String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29";
 	// private String query = "=(sources:engrXiv AND type:preprint)";

 	private final String protocolDescriptor = "rest_json2xml";
@ -63,7 +56,6 @@ class RestCollectorPluginTest {
 		params.put("resultSizeValue", resultSizeValue);
 		params.put("queryParams", query);
 		params.put("entityXpath", entityXpath);
-		params.put("requestHeaderMap", "{\"User-Agent\": \"OpenAIRE DEV\"}");

 		api.setBaseUrl(baseUrl);
 		api.setParams(params);
@ -86,19 +78,4 @@ class RestCollectorPluginTest {
 		log.info("{}", i.intValue());
 		Assertions.assertTrue(i.intValue() > 0);
 	}
-
-	@Disabled
-	@Test
-	void testUrl() throws IOException {
-		String url_s = "https://ddh-openapi.worldbank.org/search?&top=10";
-		URL url = new URL(url_s);
-		final HttpURLConnection conn = (HttpURLConnection) url.openConnection();
-		conn.setRequestMethod("GET");
-		conn.setRequestProperty("User-Agent", "OpenAIRE");
-		Gson gson = new Gson();
-		System.out.println("Request header");
-		System.out.println(gson.toJson(conn.getHeaderFields()));
-		InputStream inputStream = conn.getInputStream();
-
-	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java
@ -44,7 +44,7 @@ public class RestIteratorTest {

 		final RestIterator iterator = new RestIterator(clientParams, baseUrl, resumptionType, resumptionParam,
 			resumptionXpath, resultTotalXpath, resultFormatParam, resultFormatValue, resultSizeParam, resultSizeValue,
-			query, entityXpath, authMethod, authToken, resultOffsetParam, null);
+			query, entityXpath, authMethod, authToken, resultOffsetParam);
 		int i = 20;
 		while (iterator.hasNext() && i > 0) {
 			String result = iterator.next();
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/xslt/DataFetcherTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/xslt/DataFetcherTest.java
@ -0,0 +1,68 @@
+package eu.dnetlib.dhp.transformation.xslt;
+
+import org.json.JSONObject;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class DataFetcherTest {
+
+    @BeforeEach
+    void setUp() {
+    }
+
+    @AfterEach
+    void tearDown() {
+    }
+
+    @Test
+    void getJson() throws IOException, URISyntaxException {
+        URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
+        JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
+
+        String x = testJsonObj
+                .getJSONArray("data")
+                .getJSONObject(0)
+                .getJSONObject("embeds")
+                .getJSONObject("users")
+                .getJSONObject("data")
+                .getJSONObject("attributes")
+                .getString("full_name");
+        System.out.println(x);
+        System.out.println(testJsonObj.getJSONArray("data").length());
+        testJsonObj.getJSONArray("data").forEach(System.out::println);
+    }
+
+    @Test
+    void getAuthorsFromJson() throws IOException, URISyntaxException {
+        URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
+        JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
+        List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
+        System.out.println(authors);
+        System.out.println(DataFetcher.transformListToDublinCore(authors));
+    }
+
+    @Test
+    void getAndTransformAuthors() throws IOException, URISyntaxException {
+        URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
+        System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
+    }
+
+
+    @Test
+    void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
+        URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
+        System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
+
+
+    }
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json
@ -5,3 +5,5 @@
 {"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]}
 {"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]}
 {"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]}
+{"DOI": "10.1080/13669877.2015.1042504", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/03265fv13"}]}
+{"DOI": "10.1007/3-540-47984-8_14", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/00a0n9e72"}]}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json
@ -789,6 +789,10 @@
      "value": "2227-9717",
      "type": "electronic"
    },
+    {
+      "value": "VALUE",
+      "type": "PIPPO"
+    },
    {
      "value": "1063-4584",
      "type": "pu"
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/dblp.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/dblp.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump
@ -1,15 +1,44 @@
-{"pdb": "1CW0", "title": "crystal structure analysis of very short patch repair (vsr) endonuclease in complex with a duplex dna", "authors": ["S.E.Tsutakawa", "H.Jingami", "K.Morikawa"], "doi": "10.1016/S0092-8674(00)81550-0", "pmid": "10612397"}
-{"pdb": "2CWW", "title": "crystal structure of thermus thermophilus ttha1280, a putative sam- dependent rna methyltransferase, in complex with s-adenosyl-l- homocysteine", "authors": ["A.A.Pioszak", "K.Murayama", "N.Nakagawa", "A.Ebihara", "S.Kuramitsu", "M.Shirouzu", "S.Yokoyama", "Riken Structural Genomics/proteomics Initiative (Rsgi)"], "doi": "10.1107/S1744309105029842", "pmid": "16511182"}
-{"pdb": "6CWE", "title": "structure of alpha-gsa[8,6p] bound by cd1d and in complex with the va14vb8.2 tcr", "authors": ["J.Wang", "D.Zajonc"], "doi": null, "pmid": null}
-{"pdb": "5CWS", "title": "crystal structure of the intact chaetomium thermophilum nsp1-nup49- nup57 channel nucleoporin heterotrimer bound to its nic96 nuclear pore complex attachment site", "authors": ["C.J.Bley", "S.Petrovic", "M.Paduch", "V.Lu", "A.A.Kossiakoff", "A.Hoelz"], "doi": "10.1126/SCIENCE.AAC9176", "pmid": "26316600"}
-{"pdb": "5CWE", "title": "structure of cyp107l2 from streptomyces avermitilis with lauric acid", "authors": ["T.-V.Pham", "S.-H.Han", "J.-H.Kim", "D.-H.Kim", "L.-W.Kang"], "doi": null, "pmid": null}
-{"pdb": "7CW4", "title": "acetyl-coa acetyltransferase from bacillus cereus atcc 14579", "authors": ["J.Hong", "K.J.Kim"], "doi": "10.1016/J.BBRC.2020.09.048", "pmid": "32972748"}
-{"pdb": "2CWP", "title": "crystal structure of metrs related protein from pyrococcus horikoshii", "authors": ["K.Murayama", "M.Kato-Murayama", "M.Shirouzu", "S.Yokoyama", "Riken StructuralGenomics/proteomics Initiative (Rsgi)"], "doi": null, "pmid": null}
-{"pdb": "2CW7", "title": "crystal structure of intein homing endonuclease ii", "authors": ["H.Matsumura", "H.Takahashi", "T.Inoue", "H.Hashimoto", "M.Nishioka", "S.Fujiwara", "M.Takagi", "T.Imanaka", "Y.Kai"], "doi": "10.1002/PROT.20858", "pmid": "16493661"}
-{"pdb": "1CWU", "title": "brassica napus enoyl acp reductase a138g mutant complexed with nad+ and thienodiazaborine", "authors": ["A.Roujeinikova", "J.B.Rafferty", "D.W.Rice"], "doi": "10.1074/JBC.274.43.30811", "pmid": "10521472"}
-{"pdb": "3CWN", "title": "escherichia coli transaldolase b mutant f178y", "authors": ["T.Sandalova", "G.Schneider", "A.Samland"], "doi": "10.1074/JBC.M803184200", "pmid": "18687684"}
-{"pdb": "1CWL", "title": "human cyclophilin a complexed with 4 4-hydroxy-meleu cyclosporin", "authors": ["V.Mikol", "J.Kallen", "P.Taylor", "M.D.Walkinshaw"], "doi": "10.1006/JMBI.1998.2108", "pmid": "9769216"}
-{"pdb": "3CW2", "title": "crystal structure of the intact archaeal translation initiation factor 2 from sulfolobus solfataricus .", "authors": ["E.A.Stolboushkina", "S.V.Nikonov", "A.D.Nikulin", "U.Blaesi", "D.J.Manstein", "R.V.Fedorov", "M.B.Garber", "O.S.Nikonov"], "doi": "10.1016/J.JMB.2008.07.039", "pmid": "18675278"}
-{"pdb": "3CW9", "title": "4-chlorobenzoyl-coa ligase/synthetase in the thioester-forming conformation, bound to 4-chlorophenacyl-coa", "authors": ["A.S.Reger", "J.Cao", "R.Wu", "D.Dunaway-Mariano", "A.M.Gulick"], "doi": "10.1021/BI800696Y", "pmid": "18620418"}
-{"pdb": "3CWU", "title": "crystal structure of an alka host/guest complex 2'-fluoro-2'-deoxy-1, n6-ethenoadenine:thymine base pair", "authors": ["B.R.Bowman", "S.Lee", "S.Wang", "G.L.Verdine"], "doi": "10.1016/J.STR.2008.04.012", "pmid": "18682218"}
-{"pdb": "5CWF", "title": "crystal structure of de novo designed helical repeat protein dhr8", "authors": ["G.Bhabha", "D.C.Ekiert"], "doi": "10.1038/NATURE16162", "pmid": "26675729"}
+{"classification": "Signaling protein", "pdb": "5NM4", "deposition_date": "2017-04-05", "title": "A2a adenosine receptor room-temperature structure determined by serial Femtosecond crystallography", "Keywords": ["Oom-temperature", " serial crystallography", " signaling protein"], "authors": ["T.weinert", "R.cheng", "D.james", "D.gashi", "P.nogly", "K.jaeger", "M.hennig", "", "J.standfuss"], "pmid": "28912485", "doi": "10.1038/S41467-017-00630-4"}
+{"classification": "Oxidoreductase/oxidoreductase inhibitor", "pdb": "4KN3", "deposition_date": "2013-05-08", "title": "Structure of the y34ns91g double mutant of dehaloperoxidase from Amphitrite ornata with 2,4,6-trichlorophenol", "Keywords": ["Lobin", " oxygen storage", " peroxidase", " oxidoreductase", " oxidoreductase-", "Oxidoreductase inhibitor complex"], "authors": ["C.wang", "L.lovelace", "L.lebioda"], "pmid": "23952341", "doi": "10.1021/BI400627W"}
+{"classification": "Transport protein", "pdb": "8HKM", "deposition_date": "2022-11-27", "title": "Ion channel", "Keywords": ["On channel", " transport protein"], "authors": ["D.h.jiang", "J.t.zhang"], "pmid": "37494189", "doi": "10.1016/J.CELREP.2023.112858"}
+{"classification": "Signaling protein", "pdb": "6JT1", "deposition_date": "2019-04-08", "title": "Structure of human soluble guanylate cyclase in the heme oxidised State", "Keywords": ["Oluble guanylate cyclase", " signaling protein"], "authors": ["L.chen", "Y.kang", "R.liu", "J.-x.wu"], "pmid": "31514202", "doi": "10.1038/S41586-019-1584-6"}
+{"classification": "Immune system", "pdb": "7OW6", "deposition_date": "2021-06-16", "title": "Crystal structure of a tcr in complex with hla-a*11:01 bound to kras G12d peptide (vvvgadgvgk)", "Keywords": ["La", " kras", " tcr", " immune system"], "authors": ["V.karuppiah", "R.a.robinson"], "doi": "10.1038/S41467-022-32811-1"}
+{"classification": "Biosynthetic protein", "pdb": "5EQ8", "deposition_date": "2015-11-12", "title": "Crystal structure of medicago truncatula histidinol-phosphate Phosphatase (mthpp) in complex with l-histidinol", "Keywords": ["Istidine biosynthesis", " metabolic pathways", " dimer", " plant", "", "Biosynthetic protein"], "authors": ["M.ruszkowski", "Z.dauter"], "pmid": "26994138", "doi": "10.1074/JBC.M115.708727"}
+{"classification": "De novo protein", "pdb": "8CWA", "deposition_date": "2022-05-18", "title": "Solution nmr structure of 8-residue rosetta-designed cyclic peptide D8.21 in cdcl3 with cis/trans switching (tc conformation, 53%)", "Keywords": ["Yclic peptide", " non natural amino acids", " cis/trans", " switch peptides", "", "De novo design", "Membrane permeability", "De novo protein"], "authors": ["T.a.ramelot", "R.tejero", "G.t.montelione"], "pmid": "36041435", "doi": "10.1016/J.CELL.2022.07.019"}
+{"classification": "Hydrolase", "pdb": "3R6M", "deposition_date": "2011-03-21", "title": "Crystal structure of vibrio parahaemolyticus yeaz", "Keywords": ["Ctin/hsp70 nucleotide-binding fold", " bacterial resuscitation", " viable", "But non-culturable state", "Resuscitation promoting factor", "Ygjd", "", "Yjee", "Vibrio parahaemolyticus", "Hydrolase"], "authors": ["A.roujeinikova", "I.aydin"], "pmid": "21858042", "doi": "10.1371/JOURNAL.PONE.0023245"}
+{"classification": "Hydrolase", "pdb": "2W5J", "deposition_date": "2008-12-10", "title": "Structure of the c14-rotor ring of the proton translocating Chloroplast atp synthase", "Keywords": ["Ydrolase", " chloroplast", " atp synthase", " lipid-binding", " cf(0)", " membrane", "", "Transport", "Formylation", "Energy transduction", "Hydrogen ion transport", "", "Ion transport", "Transmembrane", "Membrane protein"], "authors": ["M.vollmar", "D.schlieper", "M.winn", "C.buechner", "G.groth"], "pmid": "19423706", "doi": "10.1074/JBC.M109.006916"}
+{"classification": "De novo protein", "pdb": "4GLU", "deposition_date": "2012-08-14", "title": "Crystal structure of the mirror image form of vegf-a", "Keywords": ["-protein", " covalent dimer", " cysteine knot protein", " growth factor", " de", "Novo protein"], "authors": ["K.mandal", "M.uppalapati", "D.ault-riche", "J.kenney", "J.lowitz", "S.sidhu", "", "S.b.h.kent"], "pmid": "22927390", "doi": "10.1073/PNAS.1210483109"}
+{"classification": "Hydrolase/hydrolase inhibitor", "pdb": "3WYL", "deposition_date": "2014-09-01", "title": "Crystal structure of the catalytic domain of pde10a complexed with 5- Methoxy-3-(1-phenyl-1h-pyrazol-5-yl)-1-(3-(trifluoromethyl)phenyl) Pyridazin-4(1h)-one", "Keywords": ["Ydrolase-hydrolase inhibitor complex"], "authors": ["H.oki", "Y.hayano"], "pmid": "25384088", "doi": "10.1021/JM5013648"}
+{"classification": "Isomerase", "pdb": "5BOR", "deposition_date": "2015-05-27", "title": "Structure of acetobacter aceti pure-s57c, sulfonate form", "Keywords": ["Cidophile", " pure", " purine biosynthesis", " isomerase"], "authors": ["K.l.sullivan", "T.j.kappock"]}
+{"classification": "Hydrolase", "pdb": "1X0C", "deposition_date": "2005-03-17", "title": "Improved crystal structure of isopullulanase from aspergillus niger Atcc 9642", "Keywords": ["Ullulan", " glycoside hydrolase family 49", " glycoprotein", " hydrolase"], "authors": ["M.mizuno", "T.tonozuka", "A.yamamura", "Y.miyasaka", "H.akeboshi", "S.kamitori", "", "A.nishikawa", "Y.sakano"], "pmid": "18155243", "doi": "10.1016/J.JMB.2007.11.098"}
+{"classification": "Oxidoreductase", "pdb": "7CUP", "deposition_date": "2020-08-23", "title": "Structure of 2,5-dihydroxypridine dioxygenase from pseudomonas putida Kt2440", "Keywords": ["On-heme dioxygenase", " oxidoreductase"], "authors": ["G.q.liu", "H.z.tang"]}
+{"classification": "Ligase", "pdb": "1VCN", "deposition_date": "2004-03-10", "title": "Crystal structure of t.th. hb8 ctp synthetase complex with sulfate Anion", "Keywords": ["Etramer", " riken structural genomics/proteomics initiative", " rsgi", "", "Structural genomics", "Ligase"], "authors": ["M.goto", "Riken structural genomics/proteomics initiative (rsgi)"], "pmid": "15296735", "doi": "10.1016/J.STR.2004.05.013"}
+{"classification": "Transferase/transferase inhibitor", "pdb": "6C9V", "deposition_date": "2018-01-28", "title": "Mycobacterium tuberculosis adenosine kinase bound to (2r,3s,4r,5r)-2- (hydroxymethyl)-5-(6-(4-phenylpiperazin-1-yl)-9h-purin-9-yl) Tetrahydrofuran-3,4-diol", "Keywords": ["Ucleoside analog", " complex", " inhibitor", " structural genomics", " psi-2", "", "Protein structure initiative", "Tb structural genomics consortium", "", "Tbsgc", "Transferase-transferase inhibitor complex"], "authors": ["R.a.crespo", "Tb structural genomics consortium (tbsgc)"], "pmid": "31002508", "doi": "10.1021/ACS.JMEDCHEM.9B00020"}
+{"classification": "De novo protein", "pdb": "4LPY", "deposition_date": "2013-07-16", "title": "Crystal structure of tencon variant g10", "Keywords": ["Ibronectin type iii fold", " alternate scaffold", " de novo protein"], "authors": ["A.teplyakov", "G.obmolova", "G.l.gilliland"], "pmid": "24375666", "doi": "10.1002/PROT.24502"}
+{"classification": "Isomerase", "pdb": "2Y88", "deposition_date": "2011-02-03", "title": "Crystal structure of mycobacterium tuberculosis phosphoribosyl Isomerase (variant d11n) with bound prfar", "Keywords": ["Romatic amino acid biosynthesis", " isomerase", " tim-barrel", " histidine", "Biosynthesis", "Tryptophan biosynthesis"], "authors": ["J.kuper", "A.v.due", "A.geerlof", "M.wilmanns"], "pmid": "21321225", "doi": "10.1073/PNAS.1015996108"}
+{"classification": "Unknown function", "pdb": "1SR0", "deposition_date": "2004-03-22", "title": "Crystal structure of signalling protein from sheep(sps-40) at 3.0a Resolution using crystal grown in the presence of polysaccharides", "Keywords": ["Ignalling protein", " involution", " unknown function"], "authors": ["D.b.srivastava", "A.s.ethayathulla", "N.singh", "J.kumar", "S.sharma", "T.p.singh"]}
+{"classification": "Dna binding protein", "pdb": "3RH2", "deposition_date": "2011-04-11", "title": "Crystal structure of a tetr-like transcriptional regulator (sama_0099) From shewanella amazonensis sb2b at 2.42 a resolution", "Keywords": ["Na/rna-binding 3-helical bundle", " structural genomics", " joint center", "For structural genomics", "Jcsg", "Protein structure initiative", "Psi-", "Biology", "Dna binding protein"], "authors": ["Joint center for structural genomics (jcsg)"]}
+{"classification": "Transferase", "pdb": "2WK5", "deposition_date": "2009-06-05", "title": "Structural features of native human thymidine phosphorylase And in complex with 5-iodouracil", "Keywords": ["Lycosyltransferase", " developmental protein", " angiogenesis", "", "5-iodouracil", "Growth factor", "Enzyme kinetics", "", "Differentiation", "Disease mutation", "Thymidine", "Phosphorylase", "Chemotaxis", "Transferase", "Mutagenesis", "", "Polymorphism"], "authors": ["E.mitsiki", "A.c.papageorgiou", "S.iyer", "N.thiyagarajan", "S.h.prior", "", "D.sleep", "C.finnis", "K.r.acharya"], "pmid": "19555658", "doi": "10.1016/J.BBRC.2009.06.104"}
+{"classification": "Hydrolase", "pdb": "3P9Y", "deposition_date": "2010-10-18", "title": "Crystal structure of the drosophila melanogaster ssu72-pctd complex", "Keywords": ["Hosphatase", " cis proline", " lmw ptp-like fold", " rna polymerase ii ctd", "", "Hydrolase"], "authors": ["J.w.werner-allen", "P.zhou"], "pmid": "21159777", "doi": "10.1074/JBC.M110.197129"}
+{"classification": "Recombination/dna", "pdb": "6OEO", "deposition_date": "2019-03-27", "title": "Cryo-em structure of mouse rag1/2 nfc complex (dna1)", "Keywords": ["(d)j recombination", " dna transposition", " rag", " scid", " recombination", "", "Recombination-dna complex"], "authors": ["X.chen", "Y.cui", "Z.h.zhou", "W.yang", "M.gellert"], "pmid": "32015552", "doi": "10.1038/S41594-019-0363-2"}
+{"classification": "Hydrolase", "pdb": "4ECA", "deposition_date": "1997-02-21", "title": "Asparaginase from e. coli, mutant t89v with covalently bound aspartate", "Keywords": ["Ydrolase", " acyl-enzyme intermediate", " threonine amidohydrolase"], "authors": ["G.j.palm", "J.lubkowski", "A.wlodawer"], "pmid": "8706862", "doi": "10.1016/0014-5793(96)00660-6"}
+{"classification": "Transcription/protein binding", "pdb": "3UVX", "deposition_date": "2011-11-30", "title": "Crystal structure of the first bromodomain of human brd4 in complex With a diacetylated histone 4 peptide (h4k12ack16ac)", "Keywords": ["Romodomain", " bromodomain containing protein 4", " cap", " hunk1", " mcap", "", "Mitotic chromosome associated protein", "Peptide complex", "Structural", "Genomics consortium", "Sgc", "Transcription-protein binding complex"], "authors": ["P.filippakopoulos", "S.picaud", "T.keates", "E.ugochukwu", "F.von delft", "", "C.h.arrowsmith", "A.m.edwards", "J.weigelt", "C.bountra", "S.knapp", "Structural", "Genomics consortium (sgc)"], "pmid": "22464331", "doi": "10.1016/J.CELL.2012.02.013"}
+{"classification": "Membrane protein", "pdb": "1TLZ", "deposition_date": "2004-06-10", "title": "Tsx structure complexed with uridine", "Keywords": ["Ucleoside transporter", " beta barrel", " uridine", " membrane", "Protein"], "authors": ["J.ye", "B.van den berg"], "pmid": "15272310", "doi": "10.1038/SJ.EMBOJ.7600330"}
+{"classification": "Dna binding protein", "pdb": "7AZD", "deposition_date": "2020-11-16", "title": "Dna polymerase sliding clamp from escherichia coli with peptide 20 Bound", "Keywords": ["Ntibacterial drug", " dna binding protein"], "authors": ["C.monsarrat", "G.compain", "C.andre", "I.martiel", "S.engilberge", "V.olieric", "", "P.wolff", "K.brillet", "M.landolfo", "C.silva da veiga", "J.wagner", "G.guichard", "", "D.y.burnouf"], "pmid": "34806883", "doi": "10.1021/ACS.JMEDCHEM.1C00918"}
+{"classification": "Transferase", "pdb": "5N3K", "deposition_date": "2017-02-08", "title": "Camp-dependent protein kinase a from cricetulus griseus in complex With fragment like molecule o-guanidino-l-homoserine", "Keywords": ["Ragment", " complex", " transferase", " serine threonine kinase", " camp", "", "Kinase", "Pka"], "authors": ["C.siefker", "A.heine", "G.klebe"]}
+{"classification": "Biosynthetic protein", "pdb": "8H52", "deposition_date": "2022-10-11", "title": "Crystal structure of helicobacter pylori carboxyspermidine Dehydrogenase in complex with nadp", "Keywords": ["Arboxyspermidine dehydrogenase", " biosynthetic protein"], "authors": ["K.y.ko", "S.c.park", "S.y.cho", "S.i.yoon"], "pmid": "36283333", "doi": "10.1016/J.BBRC.2022.10.049"}
+{"classification": "Metal binding protein", "pdb": "6DYC", "deposition_date": "2018-07-01", "title": "Co(ii)-bound structure of the engineered cyt cb562 variant, ch3", "Keywords": ["Esigned protein", " 4-helix bundle", " electron transport", " metal binding", "Protein"], "authors": ["F.a.tezcan", "J.rittle"], "pmid": "30778140", "doi": "10.1038/S41557-019-0218-9"}
+{"classification": "Protein fibril", "pdb": "6A6B", "deposition_date": "2018-06-27", "title": "Cryo-em structure of alpha-synuclein fiber", "Keywords": ["Lpha-syn fiber", " parkinson disease", " protein fibril"], "authors": ["Y.w.li", "C.y.zhao", "F.luo", "Z.liu", "X.gui", "Z.luo", "X.zhang", "D.li", "C.liu", "X.li"], "pmid": "30065316", "doi": "10.1038/S41422-018-0075-X"}
+{"classification": "Dna", "pdb": "7D5E", "deposition_date": "2020-09-25", "title": "Left-handed g-quadruplex containing two bulges", "Keywords": ["-quadruplex", " bulge", " dna", " left-handed"], "authors": ["P.das", "A.maity", "K.h.ngo", "F.r.winnerdy", "B.bakalar", "Y.mechulam", "E.schmitt", "", "A.t.phan"], "pmid": "33503265", "doi": "10.1093/NAR/GKAA1259"}
+{"classification": "Transferase", "pdb": "3RSY", "deposition_date": "2011-05-02", "title": "Cellobiose phosphorylase from cellulomonas uda in complex with sulfate And glycerol", "Keywords": ["H94", " alpha barrel", " cellobiose phosphorylase", " disaccharide", "Phosphorylase", "Transferase"], "authors": ["A.van hoorebeke", "J.stout", "W.soetaert", "J.van beeumen", "T.desmet", "S.savvides"]}
+{"classification": "Oxidoreductase", "pdb": "7MCI", "deposition_date": "2021-04-02", "title": "Mofe protein from azotobacter vinelandii with a sulfur-replenished Cofactor", "Keywords": ["Zotobacter vinelandii", " mofe-protein", " nitrogenase", " oxidoreductase"], "authors": ["W.kang", "C.lee", "Y.hu", "M.w.ribbe"], "doi": "10.1038/S41929-022-00782-7"}
+{"classification": "Dna", "pdb": "1XUW", "deposition_date": "2004-10-26", "title": "Structural rationalization of a large difference in rna affinity Despite a small difference in chemistry between two 2'-o-modified Nucleic acid analogs", "Keywords": ["Na mimetic methylcarbamate amide analog", " dna"], "authors": ["R.pattanayek", "L.sethaphong", "C.pan", "M.prhavc", "T.p.prakash", "M.manoharan", "", "M.egli"], "pmid": "15547979", "doi": "10.1021/JA044637K"}
+{"classification": "Lyase", "pdb": "7C0D", "deposition_date": "2020-05-01", "title": "Crystal structure of azospirillum brasilense l-2-keto-3-deoxyarabonate Dehydratase (hydroxypyruvate-bound form)", "Keywords": ["-2-keto-3-deoxyarabonate dehydratase", " lyase"], "authors": ["Y.watanabe", "S.watanabe"], "pmid": "32697085", "doi": "10.1021/ACS.BIOCHEM.0C00515"}
+{"classification": "Signaling protein", "pdb": "5LYK", "deposition_date": "2016-09-28", "title": "Crystal structure of intracellular b30.2 domain of btn3a1 bound to Citrate", "Keywords": ["30.2", " butyrophilin", " signaling protein"], "authors": ["F.mohammed", "A.t.baker", "M.salim", "B.e.willcox"], "pmid": "28862425", "doi": "10.1021/ACSCHEMBIO.7B00694"}
+{"classification": "Toxin", "pdb": "4IZL", "deposition_date": "2013-01-30", "title": "Structure of the n248a mutant of the panton-valentine leucocidin s Component from staphylococcus aureus", "Keywords": ["I-component leucotoxin", " staphylococcus aureus", " s component", "Leucocidin", "Beta-barrel pore forming toxin", "Toxin"], "authors": ["L.maveyraud", "B.j.laventie", "G.prevost", "L.mourey"], "pmid": "24643034", "doi": "10.1371/JOURNAL.PONE.0092094"}
+{"classification": "Dna", "pdb": "6F3C", "deposition_date": "2017-11-28", "title": "The cytotoxic [pt(h2bapbpy)] platinum complex interacting with the Cgtacg hexamer", "Keywords": ["Rug-dna complex", " four-way junction", " dna"], "authors": ["M.ferraroni", "C.bazzicalupi", "P.gratteri", "F.papi"], "pmid": "31046177", "doi": "10.1002/ANIE.201814532"}
+{"classification": "Signaling protein/inhibitor", "pdb": "4L5M", "deposition_date": "2013-06-11", "title": "Complexe of arno sec7 domain with the protein-protein interaction Inhibitor n-(4-hydroxy-2,6-dimethylphenyl)benzenesulfonamide at ph6.5", "Keywords": ["Ec-7domain", " signaling protein-inhibitor complex"], "authors": ["F.hoh", "J.rouhana"], "pmid": "24112024", "doi": "10.1021/JM4009357"}
+{"classification": "Signaling protein", "pdb": "5I6J", "deposition_date": "2016-02-16", "title": "Crystal structure of srgap2 f-barx", "Keywords": ["Rgap2", " f-bar", " fx", " signaling protein"], "authors": ["M.sporny", "J.guez-haddad", "M.n.isupov", "Y.opatowsky"], "pmid": "28333212", "doi": "10.1093/MOLBEV/MSX094"}
+{"classification": "Metal binding protein", "pdb": "1Q80", "deposition_date": "2003-08-20", "title": "Solution structure and dynamics of nereis sarcoplasmic calcium binding Protein", "Keywords": ["Ll-alpha", " metal binding protein"], "authors": ["G.rabah", "R.popescu", "J.a.cox", "Y.engelborghs", "C.t.craescu"], "pmid": "15819893", "doi": "10.1111/J.1742-4658.2005.04629.X"}
+{"classification": "Transferase", "pdb": "1TW1", "deposition_date": "2004-06-30", "title": "Beta-1,4-galactosyltransferase mutant met344his (m344h-gal-t1) complex With udp-galactose and magnesium", "Keywords": ["Et344his mutation; closed conformation; mn binding", " transferase"], "authors": ["B.ramakrishnan", "E.boeggeman", "P.k.qasba"], "pmid": "15449940", "doi": "10.1021/BI049007+"}
+{"classification": "Rna", "pdb": "2PN4", "deposition_date": "2007-04-23", "title": "Crystal structure of hepatitis c virus ires subdomain iia", "Keywords": ["Cv", " ires", " subdoamin iia", " rna", " strontium", " hepatitis"], "authors": ["Q.zhao", "Q.han", "C.r.kissinger", "P.a.thompson"], "pmid": "18391410", "doi": "10.1107/S0907444908002011"}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump
@ -1,6 +1,36 @@
-{"pid": "Q6GZX4", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 41."}], "title": "Putative transcription factor 001R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": "Q6GZX3", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 42."}], "title": "Uncharacterized protein 002L;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": "Q197F8", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 27."}], "title": "Uncharacterized protein 002R;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
-{"pid": "Q197F7", "dates": [{"date": "16-JUN-2009", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "11-JUL-2006", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 23."}], "title": "Uncharacterized protein 003L;", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus."], "references": [{"PubMed": "16912294"}, {" DOI": "10.1128/jvi.00464-06"}]}
-{"pid": "Q6GZX2", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 36."}], "title": "Uncharacterized protein 3R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
-{"pid": "Q6GZX1", "dates": [{"date": "28-JUN-2011", "date_info": " integrated into UniProtKB/Swiss-Prot."}, {"date": "19-JUL-2004", "date_info": " sequence version 1."}, {"date": "12-AUG-2020", "date_info": " entry version 34."}], "title": "Uncharacterized protein 004R;", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3).", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus."], "references": [{"PubMed": "15165820"}, {" DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZX4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 43"}], "title": "Putative transcription factor 001R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZX3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 45"}], "title": "Uncharacterized protein 002L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197F8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 29"}], "title": "Uncharacterized protein 002R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q197F7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 23"}], "title": "Uncharacterized protein 003L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZX2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 3R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZX1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 38"}], "title": "Uncharacterized protein 004R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197F5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 005L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZX0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 47"}], "title": "Uncharacterized protein 005R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q91G88", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-06-28", "date_info": "entry version 53"}], "title": "Putative KilA-N domain-containing protein 006L", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
+{"pid": " Q6GZW9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 34"}], "title": "Uncharacterized protein 006R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZW8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 32"}], "title": "Uncharacterized protein 007R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197F3", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 007R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q197F2", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-02-23", "date_info": "entry version 22"}], "title": "Uncharacterized protein 008L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZW6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 67"}], "title": "Putative helicase 009L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q91G85", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 38"}], "title": "Uncharacterized protein 009R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
+{"pid": " Q6GZW5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 010R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197E9", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 28"}], "title": "Uncharacterized protein 011L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZW4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 011R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZW3", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 012L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197E7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 37"}], "title": "Uncharacterized protein IIV3-013L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZW2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 30"}], "title": "Uncharacterized protein 013R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZW1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 014R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZW0", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 50"}], "title": "Uncharacterized protein 015R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZV8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 017L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZV7", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 018L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZV6", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 87"}], "title": "Putative serine/threonine-protein kinase 019R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZV5", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 40"}], "title": "Uncharacterized protein 020R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZV4", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 35"}], "title": "Uncharacterized protein 021L", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197D8", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-12-14", "date_info": "entry version 35"}], "title": "Transmembrane protein 022L", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZV2", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 33"}], "title": "Uncharacterized protein 023R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197D7", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2023-02-22", "date_info": "entry version 25"}], "title": "Uncharacterized protein 023R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q6GZV1", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 37"}], "title": "Uncharacterized protein 024R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q197D5", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2006-07-11", "date_info": "sequence version 1"}, {"date": "2022-10-12", "date_info": "entry version 24"}], "title": "Uncharacterized protein 025R", "organism_species": "Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Chloriridovirus"], "references": [{"PubMed": "16912294"}, {"DOI": "10.1128/jvi.00464-06"}]}
+{"pid": " Q91G70", "dates": [{"date": "2009-06-16", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2001-12-01", "date_info": "sequence version 1"}, {"date": "2020-08-12", "date_info": "entry version 32"}], "title": "Uncharacterized protein 026R", "organism_species": "Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Betairidovirinae", "Iridovirus"], "references": [{"PubMed": "17239238"}, {"DOI": "10.1186/1743-422x-4-11"}]}
+{"pid": " Q6GZU9", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 49"}], "title": "Uncharacterized protein 027R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
+{"pid": " Q6GZU8", "dates": [{"date": "2011-06-28", "date_info": "integrated into UniProtKB/Swiss-Prot"}, {"date": "2004-07-19", "date_info": "sequence version 1"}, {"date": "2023-09-13", "date_info": "entry version 55"}], "title": "Uncharacterized protein 028R", "organism_species": "Frog virus 3 (isolate Goorha) (FV-3)", "subjects": ["Viruses", "Varidnaviria", "Bamfordvirae", "Nucleocytoviricota", "Megaviricetes", "Pimascovirales", "Iridoviridae", "Alphairidovirinae", "Ranavirus"], "references": [{"PubMed": "15165820"}, {"DOI": "10.1016/j.virol.2004.02.019"}]}
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala
@ -2,9 +2,7 @@ package eu.dnetlib.dhp.collection.crossref

 import com.fasterxml.jackson.databind.ObjectMapper
 import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
-import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.TransformationType
-import org.apache.commons.io.IOUtils
-import org.junit.jupiter.api.{BeforeEach, Test}
+import org.junit.jupiter.api.BeforeEach
 import org.junit.jupiter.api.extension.ExtendWith
 import org.mockito.junit.jupiter.MockitoExtension
 import org.slf4j.{Logger, LoggerFactory}
@ -20,13 +18,4 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
    super.setUpVocabulary()
  }

-  @Test
-  def mappingRecord(): Unit = {
-    val input =
-      IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
-
-    println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All))
-
-  }
-
 }
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala
@ -16,7 +16,6 @@ import org.mockito.junit.jupiter.MockitoExtension

 import java.io.{BufferedReader, InputStream, InputStreamReader}
 import java.util.zip.GZIPInputStream
-import javax.xml.stream.XMLInputFactory
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ListBuffer
 import scala.io.Source
@ -50,8 +49,10 @@ class BioScholixTest extends AbstractVocabularyTest {

  @Test
  def testEBIData() = {
-    val inputFactory = XMLInputFactory.newInstance
-    val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
+    val inputXML = Source
+      .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
+      .mkString
+    val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes()))
    new PMParser(xml).foreach(s => println(mapper.writeValueAsString(s)))
  }

@ -90,10 +91,9 @@ class BioScholixTest extends AbstractVocabularyTest {

  @Test
  def testParsingPubmedXML(): Unit = {
-    val inputFactory = XMLInputFactory.newInstance
-
-    val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
-
+    val xml = new XMLEventReader(
+      Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
+    )
    val parser = new PMParser(xml)
    parser.foreach(checkPMArticle)
  }
@ -156,9 +156,9 @@ class BioScholixTest extends AbstractVocabularyTest {
  @Test
  def testPubmedMapping(): Unit = {

-    val inputFactory = XMLInputFactory.newInstance
-    val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
-
+    val xml = new XMLEventReader(
+      Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
+    )
    val parser = new PMParser(xml)
    val results = ListBuffer[Oaf]()
    parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcidTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcidTest.java
@ -1,4 +1,3 @@
-
 package eu.dnetlib.dhp.broker.oa.matchers.simple;

 import static org.junit.jupiter.api.Assertions.assertEquals;
@ -17,8 +16,7 @@ class EnrichMissingAuthorOrcidTest {
 	final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();

 	@BeforeEach
-	void setUp() throws Exception {
-	}
+	void setUp() throws Exception {}

 	@Test
 	void testFindDifferences_1() {
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java
@ -23,8 +23,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 public class ConversionUtilsTest {

 	@BeforeEach
-	public void setUp() throws Exception {
-	}
+	public void setUp() throws Exception {}

 	@Test
 	public void testAllResultPids() {
--- a/dhp-workflows/dhp-dedup-openaire/pom.xml
+++ b/dhp-workflows/dhp-dedup-openaire/pom.xml
@ -53,10 +53,24 @@
            <artifactId>dhp-pace-core</artifactId>
            <version>${project.version}</version>
        </dependency>
+
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
        </dependency>
+
+        <dependency>
+            <groupId>org.scala-lang.modules</groupId>
+            <artifactId>scala-java8-compat_${scala.binary.version}</artifactId>
+            <version>1.0.2</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.scala-lang.modules</groupId>
+            <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+            <version>2.11.0</version>
+        </dependency>
+
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.binary.version}</artifactId>
@ -65,10 +79,16 @@
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.binary.version}</artifactId>
        </dependency>
+
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-graphx_${scala.binary.version}</artifactId>
        </dependency>
+
+        <dependency>
+            <groupId>com.arakelian</groupId>
+            <artifactId>java-jq</artifactId>
+        </dependency>
        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
@ -81,6 +101,10 @@
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
@ -42,7 +42,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import eu.dnetlib.pace.config.DedupConfig;
-import eu.dnetlib.pace.util.SparkCompatUtils;
 import scala.Tuple3;
 import scala.collection.JavaConversions;

@ -149,7 +148,8 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
 			Dataset<Row> pivotHistory = spark
 				.createDataset(
 					Collections.emptyList(),
-					SparkCompatUtils.encoderFor(StructType.fromDDL("id STRING, lastUsage STRING")));
+					RowEncoder
+						.apply(StructType.fromDDL("id STRING, lastUsage STRING")));

 			if (StringUtils.isNotBlank(pivotHistoryDatabase)) {
 				pivotHistory = spark
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
@ -8,6 +8,7 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.*;
+import org.apache.spark.sql.catalyst.encoders.RowEncoder;
 import org.apache.spark.sql.types.StructType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -22,7 +23,6 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-import eu.dnetlib.pace.util.SparkCompatUtils;
 import scala.Tuple2;
 import scala.Tuple3;

@ -145,7 +145,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
 		StructType idsSchema = StructType
 			.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");

-		Dataset<Row> allIds = spark.emptyDataset(SparkCompatUtils.encoderFor(idsSchema));
+		Dataset<Row> allIds = spark.emptyDataset(RowEncoder.apply(idsSchema));

 		for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
 			String entityPath = graphBasePath + '/' + entityType.name();
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
@ -73,12 +73,6 @@
    "name": "Irish Nephrology Society",
    "synonym": []
  },
-  {
-    "id": "100011062",
-    "uri": "http://dx.doi.org/10.13039/100011062",
-    "name": "Asian Spinal Cord Network",
-    "synonym": []
-  },
  {
    "id": "100011096",
    "uri": "http://dx.doi.org/10.13039/100011096",
@ -223,12 +217,6 @@
    "name": "Global Brain Health Institute",
    "synonym": []
  },
-  {
-    "id": "100015776",
-    "uri": "http://dx.doi.org/10.13039/100015776",
-    "name": "Health and Social Care Board",
-    "synonym": []
-  },
  {
    "id": "100015992",
    "uri": "http://dx.doi.org/10.13039/100015992",
@ -403,18 +391,6 @@
    "name": "Irish Hospice Foundation",
    "synonym": []
  },
-  {
-    "id": "501100001596",
-    "uri": "http://dx.doi.org/10.13039/501100001596",
-    "name": "Irish Research Council for Science, Engineering and Technology",
-    "synonym": []
-  },
-  {
-    "id": "501100001597",
-    "uri": "http://dx.doi.org/10.13039/501100001597",
-    "name": "Irish Research Council for the Humanities and Social Sciences",
-    "synonym": []
-  },
  {
    "id": "501100001598",
    "uri": "http://dx.doi.org/10.13039/501100001598",
@ -515,7 +491,7 @@
    "id": "501100002081",
    "uri": "http://dx.doi.org/10.13039/501100002081",
    "name": "Irish Research Council",
-    "synonym": []
+    "synonym": ["501100001596", "501100001597"]
  },
  {
    "id": "501100002736",
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -560,7 +560,15 @@ case object Crossref2Oaf {
                "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
                "10.13039/501100013589" | "10.13039/501100000271" =>
              generateSimpleRelationFromAward(funder, "ukri________", a => a)
-
+            //HFRI
+            case "10.13039/501100013209" =>
+              generateSimpleRelationFromAward(funder, "hfri________", a => a)
+              val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
+              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
+              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
+            //ERASMUS+
+            case "10.13039/501100010790" =>
+              generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
            case _ => logger.debug("no match for " + funder.DOI.get)

          }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
@ -53,6 +53,8 @@ public class Constraints implements Serializable {

 		for (Constraint sc : constraint) {
 			boolean verified = false;
+			if (!param.containsKey(sc.getField()))
+				return false;
 			for (String value : param.get(sc.getField())) {
 				if (sc.verifyCriteria(value.trim())) {
 					verified = true;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -14,6 +14,7 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@ -84,11 +85,12 @@ public class SparkCountryPropagationJob {
 		Dataset<R> res = readPath(spark, sourcePath, resultClazz);

 		log.info("Reading prepared info: {}", preparedInfoPath);
-		Dataset<ResultCountrySet> prepared = spark
+		final Dataset<Row> preparedInfoRaw = spark
 			.read()
-			.json(preparedInfoPath)
-			.as(Encoders.bean(ResultCountrySet.class));
+			.json(preparedInfoPath);

+		if (!preparedInfoRaw.isEmpty()) {
+			final Dataset<ResultCountrySet> prepared = preparedInfoRaw.as(Encoders.bean(ResultCountrySet.class));
 			res
 				.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
 				.map(getCountryMergeFn(), Encoders.bean(resultClazz))
@ -96,7 +98,13 @@ public class SparkCountryPropagationJob {
 				.option("compression", "gzip")
 				.mode(SaveMode.Overwrite)
 				.json(outputPath);
-
+		} else {
+			res
+				.write()
+				.option("compression", "gzip")
+				.mode(SaveMode.Overwrite)
+				.json(outputPath);
+		}
 	}

 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf_remove.xml
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset/dataset
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct/otherresearchproduct
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct~HEAD
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication/publication
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software/software
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
@ -147,6 +147,7 @@ public class CleanGraphSparkJob {
 			.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
 			.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
+			.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
 			.filter((FilterFunction<T>) GraphCleaningFunctions::filter);

 		// read the master-duplicate tuples
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
@ -223,11 +223,13 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -253,11 +255,13 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
@ -278,6 +282,7 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
@ -25,7 +25,8 @@ SELECT
 		o.country || '@@@dnet:countries'                          AS country,
 		array[]::text[]                                           AS alternativenames,
 		'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
-		 array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
+		 array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid,
+        'Unknown'                                                 AS typology
 FROM dsm_organizations o
 	LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
 	LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json
@ -1,5 +0,0 @@
-[
-  {"paramName":"mt",  "paramLongName":"master",     "paramDescription": "should be local or yarn",  "paramRequired": false},
-  {"paramName":"s",   "paramLongName":"sourcePath", "paramDescription": "the source Path",           "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the path of the scholix dump", "paramRequired": true}
-]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json
@ -1,166 +0,0 @@
-{
-  "cites":{
-    "original":"Cites",
-    "inverse":"IsCitedBy"
-  },
-  "compiles":{
-    "original":"Compiles",
-    "inverse":"IsCompiledBy"
-  },
-  "continues":{
-    "original":"Continues",
-    "inverse":"IsContinuedBy"
-  },
-  "derives":{
-    "original":"IsSourceOf",
-    "inverse":"IsDerivedFrom"
-  },
-  "describes":{
-    "original":"Describes",
-    "inverse":"IsDescribedBy"
-  },
-  "documents":{
-    "original":"Documents",
-    "inverse":"IsDocumentedBy"
-  },
-  "hasmetadata":{
-    "original":"HasMetadata",
-    "inverse":"IsMetadataOf"
-  },
-  "hasassociationwith":{
-    "original":"HasAssociationWith",
-    "inverse":"HasAssociationWith"
-  },
-  "haspart":{
-    "original":"HasPart",
-    "inverse":"IsPartOf"
-  },
-  "hasversion":{
-    "original":"HasVersion",
-    "inverse":"IsVersionOf"
-  },
-  "iscitedby":{
-    "original":"IsCitedBy",
-    "inverse":"Cites"
-  },
-  "iscompiledby":{
-    "original":"IsCompiledBy",
-    "inverse":"Compiles"
-  },
-  "iscontinuedby":{
-    "original":"IsContinuedBy",
-    "inverse":"Continues"
-  },
-  "isderivedfrom":{
-    "original":"IsDerivedFrom",
-    "inverse":"IsSourceOf"
-  },
-  "isdescribedby":{
-    "original":"IsDescribedBy",
-    "inverse":"Describes"
-  },
-  "isdocumentedby":{
-    "original":"IsDocumentedBy",
-    "inverse":"Documents"
-  },
-  "isidenticalto":{
-    "original":"IsIdenticalTo",
-    "inverse":"IsIdenticalTo"
-  },
-  "ismetadatafor":{
-    "original":"IsMetadataFor",
-    "inverse":"IsMetadataOf"
-  },
-  "ismetadataof":{
-    "original":"IsMetadataOf",
-    "inverse":"IsMetadataFor"
-  },
-  "isnewversionof":{
-    "original":"IsNewVersionOf",
-    "inverse":"IsPreviousVersionOf"
-  },
-  "isobsoletedby":{
-    "original":"IsObsoletedBy",
-    "inverse":"Obsoletes"
-  },
-  "isoriginalformof":{
-    "original":"IsOriginalFormOf",
-    "inverse":"IsVariantFormOf"
-  },
-  "ispartof":{
-    "original":"IsPartOf",
-    "inverse":"HasPart"
-  },
-  "ispreviousversionof":{
-    "original":"IsPreviousVersionOf",
-    "inverse":"IsNewVersionOf"
-  },
-  "isreferencedby":{
-    "original":"IsReferencedBy",
-    "inverse":"References"
-  },
-  "isrelatedto":{
-    "original":"IsRelatedTo",
-    "inverse":"IsRelatedTo"
-  },
-  "isrequiredby":{
-    "original":"IsRequiredBy",
-    "inverse":"Requires"
-  },
-  "isreviewedby":{
-    "original":"IsReviewedBy",
-    "inverse":"Reviews"
-  },
-  "issourceof":{
-    "original":"IsSourceOf",
-    "inverse":"IsDerivedFrom"
-  },
-  "issupplementedby":{
-    "original":"IsSupplementedBy",
-    "inverse":"IsSupplementTo"
-  },
-  "issupplementto":{
-    "original":"IsSupplementTo",
-    "inverse":"IsSupplementedBy"
-  },
-  "isvariantformof":{
-    "original":"IsVariantFormOf",
-    "inverse":"IsOriginalFormOf"
-  },
-  "isversionof":{
-    "original":"IsVersionOf",
-    "inverse":"HasVersion"
-  },
-  "obsoletes":{
-    "original":"Obsoletes",
-    "inverse":"IsObsoletedBy"
-  },
-  "references":{
-    "original":"References",
-    "inverse":"IsReferencedBy"
-  },
-  "requires":{
-    "original":"Requires",
-    "inverse":"IsRequiredBy"
-  },
-  "related":{
-    "original":"IsRelatedTo",
-    "inverse":"IsRelatedTo"
-  },
-  "reviews":{
-    "original":"Reviews",
-    "inverse":"IsReviewedBy"
-  },
-  "unknown":{
-    "original":"Unknown",
-    "inverse":"Unknown"
-  },
-  "isamongtopnsimilardocuments": {
-    "original": "IsAmongTopNSimilarDocuments",
-    "inverse": "HasAmongTopNSimilarDocuments"
-  },
-  "hasamongtopnsimilardocuments": {
-    "original": "HasAmongTopNSimilarDocuments",
-    "inverse": "IsAmongTopNSimilarDocuments"
-  }
-}
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala
@ -1,258 +0,0 @@
-package eu.dnetlib.dhp.sx.graph
-
-import com.fasterxml.jackson.databind.ObjectMapper
-import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty}
-import eu.dnetlib.dhp.schema.sx.scholix.{
-  Scholix,
-  ScholixCollectedFrom,
-  ScholixEntityId,
-  ScholixIdentifier,
-  ScholixRelationship,
-  ScholixResource
-}
-import org.json4s
-import org.json4s.DefaultFormats
-import org.json4s.jackson.JsonMethods.parse
-
-import scala.collection.JavaConverters._
-import scala.io.Source
-
-case class RelationInfo(
-  source: String,
-  target: String,
-  relclass: String,
-  id: String,
-  collectedfrom: Seq[RelKeyValue]
-) {}
-case class RelKeyValue(key: String, value: String) {}
-
-object ScholexplorerUtils {
-
-  val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier"
-  val mapper = new ObjectMapper()
-
-  case class RelationVocabulary(original: String, inverse: String) {}
-
-  val relations: Map[String, RelationVocabulary] = {
-    val input = Source
-      .fromInputStream(
-        getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/relation/relations.json")
-      )
-      .mkString
-    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
-
-    lazy val json: json4s.JValue = parse(input)
-
-    json.extract[Map[String, RelationVocabulary]]
-  }
-
-  def invRel(rel: String): String = {
-    val semanticRelation = relations.getOrElse(rel.toLowerCase, null)
-    if (semanticRelation != null)
-      semanticRelation.inverse
-    else
-      null
-  }
-
-  def generateDatasourceOpenAIREURLS(id: String): String = {
-    if (id != null && id.length > 12)
-      s"https://explore.openaire.eu/search/dataprovider?datasourceId=${id.substring(3)}"
-    else
-      null
-  }
-
-  def findURLForPID(
-    pidValue: List[StructuredProperty],
-    urls: List[String]
-  ): List[(StructuredProperty, String)] = {
-    pidValue.map { p =>
-      val pv = p.getValue
-
-      val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase))
-      (p, r.orNull)
-    }
-  }
-
-  def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = {
-    if (r.getInstance() == null || r.getInstance().isEmpty)
-      return List()
-    r.getInstance()
-      .asScala
-      .filter(i => i.getUrl != null && !i.getUrl.isEmpty)
-      .filter(i => i.getPid != null && i.getUrl != null)
-      .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList))
-      .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2))
-      .distinct
-      .toList
-  }
-
-  def generateScholixResourceFromResult(result: Result): ScholixResource = {
-
-    if (result.getInstance() == null || result.getInstance().size() == 0)
-      return null
-
-    if (result.getPid == null || result.getPid.isEmpty)
-      return null
-
-    val r = new ScholixResource
-    r.setDnetIdentifier(result.getId)
-
-    val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(result)
-    if (persistentIdentifiers.isEmpty)
-      return null
-
-    r.setIdentifier(persistentIdentifiers.asJava)
-
-    r.setObjectType(result.getResulttype.getClassid)
-
-    r.setObjectSubType(
-      result
-        .getInstance()
-        .asScala
-        .filter(i => i != null && i.getInstancetype != null)
-        .map(i => i.getInstancetype.getClassname)
-        .distinct
-        .head
-    )
-
-    if (result.getTitle != null && result.getTitle.asScala.nonEmpty) {
-      val titles: List[String] = result.getTitle.asScala.map(t => t.getValue).toList
-      if (titles.nonEmpty)
-        r.setTitle(titles.head)
-      else
-        return null
-    }
-    if (result.getAuthor != null && !result.getAuthor.isEmpty) {
-      val authors: List[ScholixEntityId] =
-        result.getAuthor.asScala
-          .map(a => {
-            val entity = new ScholixEntityId()
-            entity.setName(a.getFullname)
-            if (a.getPid != null && a.getPid.size() > 0)
-              entity.setIdentifiers(
-                a.getPid.asScala
-                  .map(sp => {
-                    val id = new ScholixIdentifier()
-                    id.setIdentifier(sp.getValue)
-                    id.setSchema(sp.getQualifier.getClassid)
-                    id
-                  })
-                  .take(3)
-                  .toList
-                  .asJava
-              )
-            entity
-          })
-          .toList
-      if (authors.nonEmpty)
-        r.setCreator(authors.asJava)
-
-    }
-
-    val dt: List[String] = result
-      .getInstance()
-      .asScala
-      .filter(i => i.getDateofacceptance != null)
-      .map(i => i.getDateofacceptance.getValue)
-      .toList
-    if (dt.nonEmpty)
-      r.setPublicationDate(dt.distinct.head)
-
-    r.setPublisher(
-      result
-        .getInstance()
-        .asScala
-        .map(i => i.getHostedby)
-        .filter(h => !"unknown".equalsIgnoreCase(h.getValue))
-        .map(h => {
-          val eid = new ScholixEntityId()
-          eid.setName(h.getValue)
-          val id = new ScholixIdentifier()
-          id.setIdentifier(h.getKey)
-          id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
-          id.setUrl(generateDatasourceOpenAIREURLS(h.getKey))
-          eid.setIdentifiers(List(id).asJava)
-          eid
-        })
-        .distinct
-        .asJava
-    )
-
-    r.setCollectedFrom(
-      result.getCollectedfrom.asScala
-        .map(cf => {
-          val scf = new ScholixCollectedFrom()
-          scf.setProvisionMode("collected")
-          scf.setCompletionStatus("complete")
-          val eid = new ScholixEntityId()
-          eid.setName(cf.getValue)
-          val id = new ScholixIdentifier()
-          id.setIdentifier(cf.getKey)
-          id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
-          id.setUrl(generateDatasourceOpenAIREURLS(cf.getKey))
-          eid.setIdentifiers(List(id).asJava)
-          scf.setProvider(eid)
-          scf
-        })
-        .asJava
-    )
-
-    r
-  }
-
-  def generateScholix(relation: RelationInfo, source: ScholixResource): Scholix = {
-    val s: Scholix = new Scholix
-    s.setSource(source)
-    if (relation.collectedfrom != null && relation.collectedfrom.nonEmpty)
-      s.setLinkprovider(
-        relation.collectedfrom
-          .map(cf => {
-            val eid = new ScholixEntityId()
-            eid.setName(cf.value)
-            val id = new ScholixIdentifier()
-            id.setIdentifier(cf.key)
-            id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
-            id.setUrl(generateDatasourceOpenAIREURLS(cf.key))
-            eid.setIdentifiers(List(id).asJava)
-            eid
-          })
-          .toList
-          .asJava
-      )
-    else {
-      val eid = new ScholixEntityId()
-      eid.setName("OpenAIRE")
-      val id = new ScholixIdentifier()
-      id.setIdentifier("10|infrastruct_::f66f1bd369679b5b077dcdf006089556")
-      id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA)
-      id.setUrl(generateDatasourceOpenAIREURLS(id.getIdentifier))
-      eid.setIdentifiers(List(id).asJava)
-      s.setLinkprovider(List(eid).asJava)
-    }
-    s.setIdentifier(relation.id)
-    val semanticRelation = relations.getOrElse(relation.relclass.toLowerCase, null)
-    if (semanticRelation == null)
-      return null
-    s.setRelationship(
-      new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
-    )
-    s.setPublicationDate(source.getPublicationDate)
-    s.setPublisher(source.getPublisher)
-    val mockTarget = new ScholixResource
-    mockTarget.setDnetIdentifier(relation.target)
-    s.setTarget(mockTarget)
-    s
-  }
-
-  def updateTarget(s: Scholix, t: ScholixResource): String = {
-
-    s.setTarget(t)
-    val spublishers: Seq[ScholixEntityId] =
-      if (s.getPublisher != null && !s.getPublisher.isEmpty) s.getPublisher.asScala else List()
-    val tpublishers: Seq[ScholixEntityId] =
-      if (t.getPublisher != null && !t.getPublisher.isEmpty) t.getPublisher.asScala else List()
-    val mergedPublishers = spublishers.union(tpublishers).distinct.take(10).toList
-    s.setPublisher(mergedPublishers.asJava)
-    mapper.writeValueAsString(s)
-  }
-}
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala
@ -1,141 +0,0 @@
-package eu.dnetlib.dhp.sx.graph
-
-import eu.dnetlib.dhp.application.AbstractScalaApplication
-import eu.dnetlib.dhp.schema.oaf.{
-  KeyValue,
-  OtherResearchProduct,
-  Publication,
-  Relation,
-  Result,
-  Software,
-  Dataset => OafDataset
-}
-import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource}
-import org.apache.spark.sql.functions.{col, concat, expr, first, md5}
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql._
-import org.slf4j.{Logger, LoggerFactory}
-
-class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], log: Logger)
-    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
-
-  /** Here all the spark applications runs this method
-    * where the whole logic of the spark node is defined
-    */
-  override def run(): Unit = {
-    val sourcePath = parser.get("sourcePath")
-    log.info("sourcePath: {}", sourcePath)
-    val targetPath = parser.get("targetPath")
-    log.info("targetPath: {}", targetPath)
-    generateBidirectionalRelations(sourcePath, targetPath, spark)
-    generateScholixResource(sourcePath, targetPath, spark)
-    generateScholix(targetPath, spark)
-  }
-
-  def generateScholixResource(inputPath: String, outputPath: String, spark: SparkSession): Unit = {
-    val entityMap: Map[String, StructType] = Map(
-      "publication"          -> Encoders.bean(classOf[Publication]).schema,
-      "dataset"              -> Encoders.bean(classOf[OafDataset]).schema,
-      "software"             -> Encoders.bean(classOf[Software]).schema,
-      "otherresearchproduct" -> Encoders.bean(classOf[OtherResearchProduct]).schema
-    )
-
-    implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource])
-    implicit val resultEncoder: Encoder[Result] = Encoders.bean(classOf[Result])
-
-    val resDs = spark.emptyDataset[ScholixResource]
-    val scholixResourceDS = entityMap.foldLeft[Dataset[ScholixResource]](resDs)((res, item) => {
-      println(s"adding ${item._1}")
-      res.union(
-        spark.read
-          .schema(item._2)
-          .json(s"$inputPath/${item._1}")
-          .as[Result]
-          .map(r => ScholexplorerUtils.generateScholixResourceFromResult(r))
-          .filter(s => s != null)
-      )
-    })
-    scholixResourceDS.write.mode(SaveMode.Overwrite).save(s"$outputPath/resource")
-  }
-
-  def generateBidirectionalRelations(inputPath: String, otuputPath: String, spark: SparkSession): Unit = {
-    val relSchema = Encoders.bean(classOf[Relation]).schema
-
-    val relDF = spark.read
-      .schema(relSchema)
-      .json(s"$inputPath/relation")
-      .where(
-        "datainfo.deletedbyinference is false and source like '50%' and target like '50%' " +
-        "and relClass <> 'merges' and relClass <> 'isMergedIn'"
-      )
-      .select("source", "target", "collectedfrom", "relClass")
-
-    def invRel: String => String = { s =>
-      ScholexplorerUtils.invRel(s)
-    }
-
-    import org.apache.spark.sql.functions.udf
-    val inverseRelationUDF = udf(invRel)
-    val inverseRelation = relDF.select(
-      col("target").alias("source"),
-      col("source").alias("target"),
-      col("collectedfrom"),
-      inverseRelationUDF(col("relClass")).alias("relClass")
-    )
-
-    val bidRel = inverseRelation
-      .union(relDF)
-      .withColumn("id", md5(concat(col("source"), col("relClass"), col("target"))))
-      .withColumn("cf", expr("transform(collectedfrom, x -> struct(x.key, x.value))"))
-      .drop("collectedfrom")
-      .withColumnRenamed("cf", "collectedfrom")
-      .groupBy(col("id"))
-      .agg(
-        first("source").alias("source"),
-        first("target").alias("target"),
-        first("relClass").alias("relClass"),
-        first("collectedfrom").alias("collectedfrom")
-      )
-
-    bidRel.write.mode(SaveMode.Overwrite).save(s"$otuputPath/relation")
-
-  }
-
-  def generateScholix(outputPath: String, spark: SparkSession): Unit = {
-    implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource])
-    implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo(classOf[Scholix])
-
-    import spark.implicits._
-    val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo]
-    val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource]
-
-    val scholix_one_verse = relations
-      .joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner")
-      .map(res => ScholexplorerUtils.generateScholix(res._1, res._2))
-      .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix])))
-
-    val resourceTarget = relations
-      .joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner")
-      .map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource])))
-
-    scholix_one_verse
-      .joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner")
-      .map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2))
-      .write
-      .mode(SaveMode.Overwrite)
-      .option("compression", "gzip")
-      .text(s"$outputPath/scholix")
-  }
-}
-
-object SparkCreateScholexplorerDump {
-  val logger: Logger = LoggerFactory.getLogger(SparkCreateScholexplorerDump.getClass)
-
-  def main(args: Array[String]): Unit = {
-    new SparkCreateScholexplorerDump(
-      log = logger,
-      args = args,
-      propertyPath = "/eu/dnetlib/dhp/sx/create_scholix_dump_params.json"
-    ).initialize().run()
-  }
-}
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala
@ -1,26 +0,0 @@
-package eu.dnetlib.dhp.sx.graph.scholix
-
-import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource
-import eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
-import org.junit.jupiter.api.Test
-import org.objenesis.strategy.StdInstantiatorStrategy
-
-class ScholixGenerationTest {
-
-  @Test
-  def generateScholix(): Unit = {
-
-    val spark: SparkSession = SparkSession.builder().master("local[*]").getOrCreate()
-    val app = new SparkCreateScholexplorerDump(null, null, null)
-//   app.generateScholixResource("/home/sandro/Downloads/scholix_sample/", "/home/sandro/Downloads/scholix/", spark)
-//    app.generateBidirectionalRelations(
-//      "/home/sandro/Downloads/scholix_sample/",
-//      "/home/sandro/Downloads/scholix/",
-//      spark
-//    )
-    app.generateScholix("/home/sandro/Downloads/scholix/", spark)
-
-  }
-}
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java
@ -5,7 +5,6 @@ import java.io.StringReader;
 import java.util.*;
 import java.util.stream.Collectors;

-import eu.dnetlib.dhp.schema.solr.ExternalReference;
 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
@ -376,7 +375,7 @@ public class ProvisionModelSupport {
 		rs.setIsInDiamondJournal(r.getIsInDiamondJournal());
 		rs.setPubliclyFunded(r.getPubliclyFunded());
 		rs.setTransformativeAgreement(r.getTransformativeAgreement());
-		rs.setExternalReference(mapExternalReference(r.getExternalReference()));
+
 		rs.setInstance(mapInstances(r.getInstance()));

 		if (r instanceof Publication) {
@ -562,21 +561,6 @@ public class ProvisionModelSupport {
 			.orElse(null);
 	}

-	private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
-		return Optional.ofNullable(externalReference)
-				.map(ext -> ext.stream()
-						.map(e -> ExternalReference.newInstance(
-								e.getSitename(),
-								e.getLabel(),
-								e.getAlternateLabel(),
-								e.getUrl(),
-								mapCodeLabel(e.getQualifier()),
-								e.getRefidentifier(),
-								e.getQuery()))
-						.collect(Collectors.toList()))
-				.orElse(Lists.newArrayList());
-	}
-
 	private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
 		ContextMapper contextMapper) {

@ -595,7 +579,7 @@ public class ProvisionModelSupport {
 		}

 		return Optional
-			.of(contexts)
+			.ofNullable(contexts)
 			.map(
 				ctx -> ctx
 					.stream()
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java
@ -213,7 +213,7 @@ public class StreamingInputDocumentFactory implements Serializable {
 			}
 			writer.close();
 			indexDocument.addField(INDEX_RESULT, results.toString());
-			indexDocument.addField(INDEX_JSON_RESULT, json);
+			// indexDocument.addField(INDEX_JSON_RESULT, json);
 		} finally {
 			outputFactory.remove();
 			eventFactory.remove();
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml
@ -69,7 +69,7 @@
        </configuration>
    </global>

-    <start to="oaiphm_provision"/>
+    <start to="irish_oaiphm_provision"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java
@ -67,7 +67,7 @@ public class PrepareRelationsJobTest {
 	@Test
 	void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception {

-		final int maxRelations = 20;
+		final int maxRelations = 5;
 		PrepareRelationsJob
 			.main(
 				new String[] {
@ -86,7 +86,7 @@ public class PrepareRelationsJobTest {
 			.as(Encoders.bean(Relation.class))
 			.cache();

-		assertEquals(maxRelations, out.count());
+		assertEquals(44, out.count());

 		Dataset<Row> freq = out
 			.toDF()
@ -101,12 +101,8 @@ public class PrepareRelationsJobTest {
 		long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count");

 		assertEquals(outcome, participation);
-		assertTrue(outcome > affiliation);
-		assertTrue(participation > affiliation);
-
-		assertEquals(7, outcome);
-		assertEquals(7, participation);
-		assertEquals(6, affiliation);
+		assertEquals(outcome, affiliation);
+		assertEquals(4, affiliation);
 	}

 	protected List<Row> getRows(Dataset<Row> freq, String col) {
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java
@ -1,12 +1,13 @@

 package eu.dnetlib.dhp.oa.provision;

+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.nio.file.Path;

-import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.client.solrj.SolrQuery;
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.junit.jupiter.MockitoExtension;
-
-import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import static org.junit.jupiter.api.Assertions.assertEquals;
+import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
+import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

@ExtendWith(MockitoExtension.class)
 public class SolrConfigExploreTest {
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {

 		new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
 			.run(isLookupClient);
-		Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
+		Assertions
+			.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());

 		String[] queryStrings = {
 			"cancer",
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
 //            System.out.println(rsp.getExplainMap());

 			for (SolrDocument doc : rsp.getResults()) {
-				log.info(
+				log
+					.info(
 						doc.get("score") + "\t" +
 							doc.get("__indexrecordidentifier") + "\t" +
 							doc.get("resultidentifier") + "\t" +
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {

 		new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
 			.run(isLookupClient);
-		Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
+		Assertions
+			.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());

 		String[] queryStrings = {
 			"cancer",
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java
@ -91,9 +91,6 @@ class SolrRecordDumpJobTest {
 	public void prepareMocks() throws ISLookUpException, IOException {
 		isLookupClient.setIsLookup(isLookUpService);

-		Mockito
-			.when(isLookupClient.getDsId(Mockito.anyString()))
-			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
 			.thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml")));
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
@ -48,16 +48,25 @@
 			<case to="get-file-names">${wf:conf('resume') eq "format-results"}</case>
 			<case to="map-openaire-to-doi">${wf:conf('resume') eq "map-ids"}</case>
 			<case to="map-scores-to-dois">${wf:conf('resume') eq "map-scores"}</case>
-			<case to="create-openaire-ranking-graph">${wf:conf('resume') eq "start"}</case>
+			<case to="clear-working-dir">${wf:conf('resume') eq "start"}</case>

 			<!-- Aggregation of impact scores on the project level		-->
 			<case to="project-impact-indicators">${wf:conf('resume') eq "projects-impact"}</case>
 			<case to="create-actionset">${wf:conf('resume') eq "create-actionset"}</case>

-			<default to="create-openaire-ranking-graph" />
+			<default to="clear-working-dir" />
 		</switch>
 	</decision>

+	<action name="clear-working-dir">
+		<fs>
+			<delete path="${workingDir}"/>
+			<mkdir path="${workingDir}"/>
+		</fs>
+		<ok to="create-openaire-ranking-graph"/>
+		<error to="clear-working-dir-fail"/>
+	</action>
+
 	<!-- initial step: create citation network -->
 	<action name="create-openaire-ranking-graph">
 		<spark xmlns="uri:oozie:spark-action:0.2">
@ -618,6 +627,10 @@
 		<message>Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 	</kill>

+	<kill name="clear-working-dir-fail">
+		<message>Re-create working dir failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+	</kill>
+
 	<!-- Define ending node -->
 	<end name="end" />

--- a/Show More
+++ b/Show More