merge branch with master

[DUMP] change in the community subworkflow workflow to remove the no more needed subworkflow in common with funders dump
refactoring
2022-06-21 14:27:47 +02:00 · 2022-06-21 14:26:24 +02:00 · 2022-06-21 14:23:40 +02:00 · 2022-06-21 14:13:35 +02:00 · 2022-06-09 18:25:56 +02:00 · 2022-06-06 11:54:33 +02:00
97 changed files with 4822 additions and 1136 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -255,7 +255,8 @@ public class ZenodoAPIClient implements Serializable {

 	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {

-		ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
+		ZenodoModelList zenodoModelList = new Gson()
+			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);

 		for (ZenodoModel zm : zenodoModelList) {
 			if (zm.getConceptrecid().equals(concept_rec_id)) {
@ -263,8 +264,9 @@ public class ZenodoAPIClient implements Serializable {
 				return;
 			}
 		}
-		if(zenodoModelList.size() == 0)
-			throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
+		if (zenodoModelList.size() == 0)
+			throw new MissingConceptDoiException(
+				"The concept record id specified was missing in the list of depositions");
 		setDepositionId(concept_rec_id, page + 1);

 	}
@ -278,11 +280,11 @@ public class ZenodoAPIClient implements Serializable {
 		String url = urlBuilder.build().toString();

 		Request request = new Request.Builder()
-				.url(url)
-				.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
-				.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
-				.get()
-				.build();
+			.url(url)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
+			.get()
+			.build();

 		try (Response response = httpClient.newCall(request).execute()) {

@ -295,7 +297,6 @@ public class ZenodoAPIClient implements Serializable {

 	}

-
 	private String getBucket(String url) throws IOException {
 		OkHttpClient httpClient = new OkHttpClient.Builder()
 			.connectTimeout(600, TimeUnit.SECONDS)
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;

 import static eu.dnetlib.dhp.schema.common.ModelConstants.*;

+import java.sql.Array;
+import java.sql.SQLException;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
@ -118,6 +120,17 @@ public class OafMapperUtils {
 			.collect(Collectors.toList());
 	}

+	public static <T> List<T> listValues(Array values) throws SQLException {
+		if (Objects.isNull(values)) {
+			return null;
+		}
+		return Arrays
+			.stream((T[]) values.getArray())
+			.filter(Objects::nonNull)
+			.distinct()
+			.collect(Collectors.toList());
+	}
+
 	public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
 		return values
 			.stream()
@ -391,4 +404,19 @@ public class OafMapperUtils {
 		}
 		return null;
 	}
+
+	public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) {
+		KeyValue kv = new KeyValue();
+		kv.setDataInfo(dataInfo);
+		kv.setKey(key);
+		kv.setValue(value);
+		return kv;
+	}
+
+	public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
+		Measure m = new Measure();
+		m.setId(id);
+		m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
+		return m;
+	}
 }
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -44,105 +44,104 @@ class OafMapperUtilsTest {
 	@Test
 	void testDateValidation() {

-		assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z  ").isPresent());
-		assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
-		assertTrue(GraphCleaningFunctions.doCleanDate("  2016-04-05").isPresent());
+		assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z  "));
+		assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
+		assertNotNull(GraphCleaningFunctions.cleanDate("  2016-04-05"));

-		assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
+		assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));

-		assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 2006").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 MST 2006").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
-		assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
-		assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
-		assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
+		assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan  2 15:04:05 2006"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan  2 15:04:05 MST 2006"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
+		assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
+		assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
+		assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
 		assertEquals(
 			"2015-07-03",
-			GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
-		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
-		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
-		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
-		assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
-		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
-		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
-		assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
-		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
-		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
-		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
-		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
-		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
-		assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
-		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
-		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
-		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
-		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
-		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
-		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
-		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
-		assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
-		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
-		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
+			GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
+		assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
+		assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
+		assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
+		assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
+		assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
+		assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
+		assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
+		assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
+		assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
+		assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
+		assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
+		assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
+		assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
+		assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
+		assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
+		assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
+		assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
+		assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
+		assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
+		assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
+		assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
+		assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
 		assertEquals(
-			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
+			"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
 		assertEquals(
-			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
-		assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
-		assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
-		assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
-		assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
-		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
-		assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
-		assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
-		assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
-		assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
+			"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
+		assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
+		assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
+		assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
+		assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
+		assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
+		assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
+		assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
+		assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
+		assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));

 	}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
@ -27,6 +27,8 @@ public class Constants {
 	public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
 	public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
 	public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg";
+	public static final String UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID = "measure:usage_counts";
+	public static final String UPDATE_KEY_USAGE_COUNTS = "count";

 	public static final String FOS_CLASS_ID = "FOS";
 	public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
@ -0,0 +1,156 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import static eu.dnetlib.dhp.actionmanager.Constants.*;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.DataInfo;
+import eu.dnetlib.dhp.schema.oaf.Measure;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import scala.Tuple2;
+
+/**
+ * created the Atomic Action for each type of results
+ */
+public class SparkAtomicActionUsageJob implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionUsageJob.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkAtomicActionUsageJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		SparkConf conf = new SparkConf();
+		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
+
+		final String dbname = parser.get("usagestatsdb");
+
+		final String workingPath = parser.get("workingPath");
+
+		runWithSparkHiveSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				prepareResults(dbname, spark, workingPath);
+				writeActionSet(spark, workingPath, outputPath);
+			});
+	}
+
+	public static void prepareResults(String db, SparkSession spark, String workingPath) {
+		spark
+			.sql(
+				"Select result_id, downloads, views " +
+					"from " + db + ".usage_stats")
+			.as(Encoders.bean(UsageStatsModel.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath);
+	}
+
+	public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) {
+		readPath(spark, inputPath, UsageStatsModel.class)
+			.groupByKey((MapFunction<UsageStatsModel, String>) us -> us.getResult_id(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, UsageStatsModel, Result>) (k, it) -> {
+				UsageStatsModel first = it.next();
+				it.forEachRemaining(us -> {
+					first.setDownloads(first.getDownloads() + us.getDownloads());
+					first.setViews(first.getViews() + us.getViews());
+				});
+
+				Result res = new Result();
+				res.setId("50|" + k);
+
+				res.setMeasures(getMeasure(first.getDownloads(), first.getViews()));
+				return res;
+			}, Encoders.bean(Result.class))
+			.toJavaRDD()
+			.map(p -> new AtomicAction(p.getClass(), p))
+			.mapToPair(
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
+
+	}
+
+	private static List<Measure> getMeasure(Long downloads, Long views) {
+		DataInfo dataInfo = OafMapperUtils
+			.dataInfo(
+				false,
+				UPDATE_DATA_INFO_TYPE,
+				true,
+				false,
+				OafMapperUtils
+					.qualifier(
+						UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
+						UPDATE_CLASS_NAME,
+						ModelConstants.DNET_PROVENANCE_ACTIONS,
+						ModelConstants.DNET_PROVENANCE_ACTIONS),
+				"");
+
+		return Arrays
+			.asList(
+				OafMapperUtils
+					.newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo),
+				OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo));
+
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+	public static <R> Dataset<R> readPath(
+		SparkSession spark, String inputPath, Class<R> clazz) {
+		return spark
+			.read()
+			.textFile(inputPath)
+			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java
@ -0,0 +1,34 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import java.io.Serializable;
+
+public class UsageStatsModel implements Serializable {
+	private String result_id;
+	private Long downloads;
+	private Long views;
+
+	public String getResult_id() {
+		return result_id;
+	}
+
+	public void setResult_id(String result_id) {
+		this.result_id = result_id;
+	}
+
+	public Long getDownloads() {
+		return downloads;
+	}
+
+	public void setDownloads(Long downloads) {
+		this.downloads = downloads;
+	}
+
+	public Long getViews() {
+		return views;
+	}
+
+	public void setViews(Long views) {
+		this.views = views;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
@ -0,0 +1,32 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "hmu",
+    "paramLongName": "hive_metastore_uris",
+    "paramDescription": "the URI for the hive metastore",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  },
+  {
+    "paramName": "sdb",
+    "paramLongName": "usagestatsdb",
+    "paramDescription": "the name of the db to be used",
+    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingPath",
+    "paramDescription": "the workingPath where to save the content of the usage_stats table",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml
@ -27,4 +27,4 @@
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
-</configuration>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
@ -0,0 +1,99 @@
+<workflow-app name="UsageStatsCounts" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>outputPath</name>
+            <description>the path where to store the actionset</description>
+        </property>
+        <property>
+            <name>usagestatsdb</name>
+            <description>the name of the db to be used</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+    <start to="atomicactions"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+
+    <action name="atomicactions">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the usage stats count for results</name>
+            <class>eu.dnetlib.dhp.actionmanager.usagestats.SparkAtomicActionUsageJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--hive_metastore_uris</arg><arg>${hiveMetastoreUris}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--usagestatsdb</arg><arg>${usagestatsdb}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/usageDb</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
@ -0,0 +1,256 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class SparkAtomicActionCountJobTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(SparkAtomicActionCountJobTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(SparkAtomicActionCountJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(SparkAtomicActionCountJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SparkAtomicActionCountJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testMatch() {
+		String usageScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb")
+			.getPath();
+
+		SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Result> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class))
+			.map(aa -> (Result) aa.getPayload());
+
+		Assertions.assertEquals(9, tmp.count());
+
+		tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size()));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible()))));
+
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"measure:usage_counts",
+										u.getDataInfo().getProvenanceaction().getClassid()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"Inferred by OpenAIRE",
+										u.getDataInfo().getProvenanceaction().getClassname()))));
+
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"count",
+										u.getKey()))));
+
+		Assertions
+			.assertEquals(
+				1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"5",
+				tmp
+					.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"1",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"2",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"6",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb
@ -0,0 +1,12 @@
+{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4}
+{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1}
+{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1}
+{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3}
+{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1}
+{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3}
+{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3}
+{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1}
+{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3}
+{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8}
+{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2}
+{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3}
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.crossref

 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf._
-import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
+import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
 import eu.dnetlib.dhp.utils.DHPUtils
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil._
@ -280,10 +280,10 @@ case object Crossref2Oaf {
      instance.setDateofacceptance(asField(createdDate.getValue))
    }
    val s: List[String] = List("https://doi.org/" + doi)
-//    val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
-//    if (links.nonEmpty) {
-//      instance.setUrl(links.asJava)
-//    }
+    //    val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
+    //    if (links.nonEmpty) {
+    //      instance.setUrl(links.asJava)
+    //    }
    if (s.nonEmpty) {
      instance.setUrl(s.asJava)
    }
@ -576,14 +576,19 @@ case object Crossref2Oaf {

  def extractDate(dt: String, datePart: List[List[Int]]): String = {
    if (StringUtils.isNotBlank(dt))
-      return dt
+      return GraphCleaningFunctions.cleanDate(dt)
    if (datePart != null && datePart.size == 1) {
      val res = datePart.head
      if (res.size == 3) {
        val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
        if (dp.length == 10) {
-          return dp
+          return GraphCleaningFunctions.cleanDate(dp)
        }
+      } else if (res.size == 2) {
+        val dp = f"${res.head}-${res(1)}%02d-01"
+        return GraphCleaningFunctions.cleanDate(dp)
+      } else if (res.size == 1) {
+        return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
      }
    }
    null
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/issue_date.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/issue_date.json
@ -0,0 +1,330 @@
+{
+   "indexed":{
+      "date-parts":[
+         [
+            2022,
+            4,
+            14
+         ]
+      ],
+      "date-time":"2022-04-14T11:27:30Z",
+      "timestamp":1649935650109
+   },
+   "reference-count":22,
+   "publisher":"SAGE Publications",
+   "issue":"2",
+   "license":[
+      {
+         "start":{
+            "date-parts":[
+               [
+                  1980,
+                  4,
+                  1
+               ]
+            ],
+            "date-time":"1980-04-01T00:00:00Z",
+            "timestamp":323395200000
+         },
+         "content-version":"tdm",
+         "delay-in-days":0,
+         "URL":"http:\/\/journals.sagepub.com\/page\/policies\/text-and-data-mining-license"
+      }
+   ],
+   "content-domain":{
+      "domain":[
+
+      ],
+      "crossmark-restriction":false
+   },
+   "short-container-title":[
+      "Perception"
+   ],
+   "published-print":{
+      "date-parts":[
+         [
+            1980,
+            4
+         ]
+      ]
+   },
+   "abstract":"<jats:p> To answer the question \u2018What is suppressed during binocular rivalry?\u2019 a series of three experiments was performed. In the first experiment observers viewed binocular rivalry between orthogonally oriented patterns. When the dominant and suppressed patterns were interchanged between the eyes observers continued seeing with the dominant eye, indicating that an eye, not a pattern, is suppressed during rivalry. In a second experiment it was found that a suppressed eye was able to contribute to stereopsis. A third experiment demonstrated that the predominance of an eye could be influenced by prior adaptation of the other eye, indicating that binocular mechanisms participate in the rivalry process. <\/jats:p>",
+   "DOI":"10.1068\/p090223",
+   "type":"journal-article",
+   "created":{
+      "date-parts":[
+         [
+            2007,
+            1,
+            23
+         ]
+      ],
+      "date-time":"2007-01-23T15:21:36Z",
+      "timestamp":1169565696000
+   },
+   "page":"223-231",
+   "source":"Crossref",
+   "is-referenced-by-count":123,
+   "title":[
+      "What is Suppressed during Binocular Rivalry?"
+   ],
+   "prefix":"10.1177",
+   "volume":"9",
+   "author":[
+      {
+         "given":"Randolph",
+         "family":"Blake",
+         "sequence":"first",
+         "affiliation":[
+            {
+               "name":"Cresap Neuroscience Laboratory, Northwestern University, Evanston, Illinois 60201, USA"
+            }
+         ]
+      },
+      {
+         "given":"David H",
+         "family":"Westendorf",
+         "sequence":"additional",
+         "affiliation":[
+            {
+               "name":"Department of Psychology, University of Arkansas, Fayetteville, Arkansas 72701, USA"
+            }
+         ]
+      },
+      {
+         "given":"Randall",
+         "family":"Overton",
+         "sequence":"additional",
+         "affiliation":[
+            {
+               "name":"Department of Psychology, Illinois State University, Normal, Illinois 61761, USA"
+            }
+         ]
+      }
+   ],
+   "member":"179",
+   "published-online":{
+      "date-parts":[
+         [
+            2016,
+            6,
+            25
+         ]
+      ]
+   },
+   "reference":[
+      {
+         "key":"bibr1-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1136\/bjo.37.1.37"
+      },
+      {
+         "key":"bibr2-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1037\/0096-1523.5.2.315"
+      },
+      {
+         "key":"bibr3-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1016\/0042-6989(74)90065-0"
+      },
+      {
+         "key":"bibr4-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1068\/p080143"
+      },
+      {
+         "key":"bibr5-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1016\/0042-6989(70)90036-2"
+      },
+      {
+         "key":"bibr6-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1113\/jphysiol.1969.sp008862"
+      },
+      {
+         "key":"bibr7-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1113\/jphysiol.1972.sp010006"
+      },
+      {
+         "key":"bibr8-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1146\/annurev.ps.23.020172.002213"
+      },
+      {
+         "key":"bibr9-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1126\/science.166.3902.245"
+      },
+      {
+         "key":"bibr10-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1037\/h0075805"
+      },
+      {
+         "key":"bibr11-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1113\/jphysiol.1968.sp008552"
+      },
+      {
+         "key":"bibr12-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1113\/jphysiol.1965.sp007784"
+      },
+      {
+         "key":"bibr13-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1037\/h0032455"
+      },
+      {
+         "key":"bibr14-p090223",
+         "volume-title":"Treatise on Physiological Optics",
+         "volume":"3",
+         "author":"von Helmholtz H",
+         "year":"1866",
+         "edition":"3"
+      },
+      {
+         "key":"bibr15-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1068\/p040125"
+      },
+      {
+         "key":"bibr16-p090223",
+         "volume-title":"On Binocular Rivalry",
+         "author":"Levelt W J M",
+         "year":"1965"
+      },
+      {
+         "key":"bibr17-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1001\/archopht.1935.00840020011001"
+      },
+      {
+         "key":"bibr18-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.3758\/BF03205796"
+      },
+      {
+         "key":"bibr19-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.3758\/BF03210180"
+      },
+      {
+         "key":"bibr20-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1037\/0033-2909.85.2.376"
+      },
+      {
+         "key":"bibr21-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.1016\/0042-6989(79)90169-X"
+      },
+      {
+         "key":"bibr22-p090223",
+         "doi-asserted-by":"publisher",
+         "DOI":"10.3758\/BF03210465"
+      }
+   ],
+   "container-title":[
+      "Perception"
+   ],
+   "original-title":[
+
+   ],
+   "language":"en",
+   "link":[
+      {
+         "URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
+         "content-type":"application\/pdf",
+         "content-version":"vor",
+         "intended-application":"text-mining"
+      },
+      {
+         "URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
+         "content-type":"unspecified",
+         "content-version":"vor",
+         "intended-application":"similarity-checking"
+      }
+   ],
+   "deposited":{
+      "date-parts":[
+         [
+            2021,
+            12,
+            3
+         ]
+      ],
+      "date-time":"2021-12-03T11:49:48Z",
+      "timestamp":1638532188000
+   },
+   "score":1,
+   "resource":{
+      "primary":{
+         "URL":"http:\/\/journals.sagepub.com\/doi\/10.1068\/p090223"
+      }
+   },
+   "subtitle":[
+
+   ],
+   "short-title":[
+
+   ],
+   "issued":{
+      "date-parts":[
+         [
+            1980,
+            4
+         ]
+      ]
+   },
+   "references-count":22,
+   "journal-issue":{
+      "issue":"2",
+      "published-print":{
+         "date-parts":[
+            [
+               1980,
+               4
+            ]
+         ]
+      }
+   },
+   "alternative-id":[
+      "10.1068\/p090223"
+   ],
+   "URL":"http:\/\/dx.doi.org\/10.1068\/p090223",
+   "relation":{
+
+   },
+   "ISSN":[
+      "0301-0066",
+      "1468-4233"
+   ],
+   "issn-type":[
+      {
+         "value":"0301-0066",
+         "type":"print"
+      },
+      {
+         "value":"1468-4233",
+         "type":"electronic"
+      }
+   ],
+   "subject":[
+      "Artificial Intelligence",
+      "Sensory Systems",
+      "Experimental and Cognitive Psychology",
+      "Ophthalmology"
+   ],
+   "published":{
+      "date-parts":[
+         [
+            1980,
+            4
+         ]
+      ]
+   }
+}
--- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
@ -73,6 +73,20 @@ class CrossrefMappingTest {

  }

+  @Test
+  def crossrefIssueDateTest(): Unit = {
+    val json =
+      Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
+    assertNotNull(json)
+    assertFalse(json.isEmpty)
+    val resultList: List[Oaf] = Crossref2Oaf.convert(json)
+    assertTrue(resultList.nonEmpty)
+
+    val items = resultList.filter(p => p.isInstanceOf[Result])
+
+    println(mapper.writeValueAsString(items.head))
+  }
+
  @Test
  def testOrcidID(): Unit = {
    val json = Source
@ -82,7 +96,7 @@ class CrossrefMappingTest {
      .mkString

    assertNotNull(json)
-    assertFalse(json.isEmpty);
+    assertFalse(json.isEmpty)

    val resultList: List[Oaf] = Crossref2Oaf.convert(json)

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -103,7 +103,7 @@ public class SparkBulkTagJob {
 		ResultTagger resultTagger = new ResultTagger();
 		readPath(spark, inputPath, resultClazz)
 			.map(patchResult(), Encoders.bean(resultClazz))
-				.filter(Objects::nonNull)
+			.filter(Objects::nonNull)
 			.map(
 				(MapFunction<R, R>) value -> resultTagger
 					.enrichContextCriteria(
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
@ -0,0 +1,251 @@
+
+package eu.dnetlib.dhp.bulktag;
+
+import static eu.dnetlib.dhp.PropagationConstant.readPath;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+
+public class SparkEoscTag {
+	private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
+	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
+		.qualifier(
+			"EOSC",
+			"European Open Science Cloud",
+			ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
+	public static final DataInfo EOSC_DATAINFO = OafMapperUtils
+		.dataInfo(
+			false, "propagation", true, false,
+			OafMapperUtils
+				.qualifier(
+					"propagation:subject", "Inferred by OpenAIRE",
+					ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
+			"0.9");
+	public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils
+		.structuredProperty(
+			"EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO);
+	public final static StructuredProperty EOSC_GALAXY = OafMapperUtils
+		.structuredProperty(
+			"EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO);
+	public final static StructuredProperty EOSC_TWITTER = OafMapperUtils
+		.structuredProperty(
+			"EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO);
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkEoscTag.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);
+
+		final String workingPath = parser.get("workingPath");
+		log.info("workingPath: {}", workingPath);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				execEoscTag(spark, inputPath, workingPath);
+
+			});
+	}
+
+	private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
+
+		readPath(spark, inputPath + "/software", Software.class)
+			.map((MapFunction<Software, Software>) s -> {
+				List<StructuredProperty> sbject;
+				if (!Optional.ofNullable(s.getSubject()).isPresent())
+					s.setSubject(new ArrayList<>());
+				sbject = s.getSubject();
+
+				if (containsCriteriaNotebook(s)) {
+					sbject.add(EOSC_NOTEBOOK);
+					if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) {
+						sbject = sbject.stream().map(sb -> {
+							if (sb.getValue().equals("EOSC Jupyter Notebook")) {
+								return null;
+							}
+							return sb;
+						}).filter(Objects::nonNull).collect(Collectors.toList());
+						s.setSubject(sbject);
+					}
+				}
+				if (containsCriteriaGalaxy(s)) {
+					sbject.add(EOSC_GALAXY);
+				}
+				return s;
+			}, Encoders.bean(Software.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath + "/software");
+
+		readPath(spark, workingPath + "/software", Software.class)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(inputPath + "/software");
+
+		readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
+			.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
+				List<StructuredProperty> sbject;
+				if (!Optional.ofNullable(orp.getSubject()).isPresent())
+					orp.setSubject(new ArrayList<>());
+				sbject = orp.getSubject();
+				if (containsCriteriaGalaxy(orp)) {
+					sbject.add(EOSC_GALAXY);
+				}
+				if (containscriteriaTwitter(orp)) {
+					sbject.add(EOSC_TWITTER);
+				}
+				return orp;
+			}, Encoders.bean(OtherResearchProduct.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath + "/otherresearchproduct");
+
+		readPath(spark, workingPath + "/otherresearchproduct", OtherResearchProduct.class)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(inputPath + "/otherresearchproduct");
+
+		readPath(spark, inputPath + "/dataset", Dataset.class)
+			.map((MapFunction<Dataset, Dataset>) d -> {
+				List<StructuredProperty> sbject;
+				if (!Optional.ofNullable(d.getSubject()).isPresent())
+					d.setSubject(new ArrayList<>());
+				sbject = d.getSubject();
+				if (containscriteriaTwitter(d)) {
+					sbject.add(EOSC_TWITTER);
+				}
+				return d;
+			}, Encoders.bean(Dataset.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath + "/dataset");
+
+		readPath(spark, workingPath + "/dataset", Dataset.class)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(inputPath + "/dataset");
+	}
+
+	private static boolean containscriteriaTwitter(Result r) {
+		Set<String> words = getWordsSP(r.getTitle());
+		words.addAll(getWordsF(r.getDescription()));
+
+		if (words.contains("twitter") &&
+			(words.contains("data") || words.contains("dataset")))
+			return true;
+
+		if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
+			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
+			return true;
+		return false;
+	}
+
+	private static boolean containsCriteriaGalaxy(Result r) {
+		Set<String> words = getWordsSP(r.getTitle());
+		words.addAll(getWordsF(r.getDescription()));
+		if (words.contains("galaxy") &&
+			words.contains("workflow"))
+			return true;
+
+		if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
+			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
+			return true;
+		return false;
+	}
+
+	private static boolean containsCriteriaNotebook(Software s) {
+		if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
+			return true;
+		if (s
+			.getSubject()
+			.stream()
+			.anyMatch(
+				sbj -> sbj.getValue().toLowerCase().contains("python") &&
+					sbj.getValue().toLowerCase().contains("notebook")))
+			return true;
+		if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
+			s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
+			return true;
+		return false;
+	}
+
+	private static Set<String> getSubjects(List<StructuredProperty> s) {
+		Set<String> subjects = new HashSet<>();
+		s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" "))));
+		s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase()));
+		return subjects;
+	}
+
+	private static Set<String> getWordsSP(List<StructuredProperty> elem) {
+		Set<String> words = new HashSet<>();
+		Optional
+			.ofNullable(elem)
+			.ifPresent(
+				e -> e
+					.forEach(
+						t -> words
+							.addAll(
+								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
+		return words;
+	}
+
+	private static Set<String> getWordsF(List<Field<String>> elem) {
+		Set<String> words = new HashSet<>();
+		Optional
+			.ofNullable(elem)
+			.ifPresent(
+				e -> e
+					.forEach(
+						t -> words
+							.addAll(
+								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
+//		elem
+//			.forEach(
+//				t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
+		return words;
+
+	}
+}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -102,21 +102,28 @@ public class SparkCountryPropagationJob {
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
 		return t -> {
 			Optional.ofNullable(t._2()).ifPresent(r -> {
-				t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
+				if (Optional.ofNullable(t._1().getCountry()).isPresent())
+					t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
+				else
+					t._1().setCountry(merge(null, t._2().getCountrySet()));
 			});
 			return t._1();
 		};
 	}

 	private static List<Country> merge(List<Country> c1, List<CountrySbs> c2) {
-		HashSet<String> countries = c1
-			.stream()
-			.map(Qualifier::getClassid)
-			.collect(Collectors.toCollection(HashSet::new));
+		HashSet<String> countries = new HashSet<>();
+		if (Optional.ofNullable(c1).isPresent()) {
+			countries = c1
+				.stream()
+				.map(Qualifier::getClassid)
+				.collect(Collectors.toCollection(HashSet::new));
+		}

+		HashSet<String> finalCountries = countries;
 		return c2
 			.stream()
-			.filter(c -> !countries.contains(c.getClassid()))
+			.filter(c -> !finalCountries.contains(c.getClassid()))
 			.map(c -> getCountry(c.getClassid(), c.getClassname()))
 			.collect(Collectors.toList());
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  }
+
+]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -204,7 +204,31 @@
        <error to="Kill"/>
    </action>

-    <join name="wait" to="End"/>
+    <join name="wait" to="eosc_tag"/>
+
+    <action name="eosc_tag">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>EOSC_tagging</name>
+            <class>eu.dnetlib.dhp.bulktag.SparkEoscTag</class>
+            <jar>dhp-enrichment-${projectVersion}.jar</jar>
+            <spark-opts>
+                --num-executors=${sparkExecutorNumber}
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>

    <end name="End"/>

--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
@ -0,0 +1,538 @@
+
+package eu.dnetlib.dhp.bulktag;
+
+import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.*;
+
+public class EOSCTagJobTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+
+	private static final Logger log = LoggerFactory.getLogger(EOSCTagJobTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files.createTempDirectory(EOSCTagJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(EOSCTagJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(EOSCTagJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void jupyterUpdatesTest() throws Exception {
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/software").getPath())
+			.map(
+				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
+				Encoders.bean(Software.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/software");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/dataset").getPath())
+			.map(
+				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
+				Encoders.bean(Dataset.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/dataset");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct").getPath())
+			.map(
+				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
+					.readValue(value, OtherResearchProduct.class),
+				Encoders.bean(OtherResearchProduct.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/otherresearchproduct");
+
+		SparkEoscTag
+			.main(
+				new String[] {
+					"-isSparkSessionManaged", Boolean.FALSE.toString(),
+					"-sourcePath",
+					workingDir.toString() + "/input",
+					"-workingPath", workingDir.toString() + "/working"
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Software> tmp = sc
+			.textFile(workingDir.toString() + "/input/software")
+			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
+
+		Assertions.assertEquals(10, tmp.count());
+
+		Assertions
+			.assertEquals(
+				4,
+				tmp
+					.filter(
+						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
+					.count());
+
+		Assertions
+			.assertEquals(
+				1, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertTrue(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+
+		Assertions
+			.assertEquals(
+				5, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertFalse(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+
+		Assertions
+			.assertEquals(
+				9, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertTrue(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+
+		Assertions
+			.assertEquals(
+				5, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertFalse(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+
+		Assertions
+			.assertEquals(
+				9, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertTrue(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+
+		List<StructuredProperty> subjects = tmp
+			.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
+			.collect()
+			.get(0)
+			.getSubject();
+		Assertions.assertEquals(8, subjects.size());
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("algorithme")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("simulation numérique")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de gaz")));
+		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de liquide")));
+
+		Assertions
+			.assertEquals(
+				10, sc
+					.textFile(workingDir.toString() + "/input/dataset")
+					.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
+					.count());
+
+		Assertions
+			.assertEquals(
+				0, sc
+					.textFile(workingDir.toString() + "/input/dataset")
+					.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
+					.filter(
+						ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
+					.count());
+
+		Assertions
+			.assertEquals(
+				10, sc
+					.textFile(workingDir.toString() + "/input/otherresearchproduct")
+					.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
+					.count());
+
+		Assertions
+			.assertEquals(
+				0, sc
+					.textFile(workingDir.toString() + "/input/otherresearchproduct")
+					.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
+					.filter(
+						ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
+					.count());
+
+		// spark.stop();
+	}
+
+	@Test
+	void galaxyUpdatesTest() throws Exception {
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/software").getPath())
+			.map(
+				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
+				Encoders.bean(Software.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/software");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/dataset").getPath())
+			.map(
+				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
+				Encoders.bean(Dataset.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/dataset");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct").getPath())
+			.map(
+				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
+					.readValue(value, OtherResearchProduct.class),
+				Encoders.bean(OtherResearchProduct.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/otherresearchproduct");
+
+		SparkEoscTag
+			.main(
+				new String[] {
+					"-isSparkSessionManaged", Boolean.FALSE.toString(),
+					"-sourcePath",
+					workingDir.toString() + "/input",
+					"-workingPath", workingDir.toString() + "/working"
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Software> tmp = sc
+			.textFile(workingDir.toString() + "/input/software")
+			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
+
+		Assertions.assertEquals(11, tmp.count());
+
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(
+						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
+					.count());
+
+		Assertions
+			.assertEquals(
+				2, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertTrue(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
+
+		Assertions
+			.assertEquals(
+				5, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+
+		Assertions
+			.assertEquals(
+				8, tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertFalse(
+				tmp
+					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
+
+		JavaRDD<OtherResearchProduct> orp = sc
+			.textFile(workingDir.toString() + "/input/otherresearchproduct")
+			.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
+
+		Assertions.assertEquals(10, orp.count());
+
+		Assertions
+			.assertEquals(
+				1,
+				orp
+					.filter(
+						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
+					.count());
+
+		Assertions
+			.assertEquals(
+				3, orp
+					.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertTrue(
+				orp
+					.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
+
+		Assertions
+			.assertEquals(
+				2, orp
+					.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertFalse(
+				orp
+					.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
+
+		Assertions
+			.assertEquals(
+				2, orp
+					.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.size());
+		Assertions
+			.assertFalse(
+				orp
+					.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
+					.collect()
+					.get(0)
+					.getSubject()
+					.stream()
+					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
+
+	}
+
+	@Test
+	void twitterUpdatesTest() throws Exception {
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/software").getPath())
+			.map(
+				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
+				Encoders.bean(Software.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/software");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/dataset").getPath())
+			.map(
+				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
+				Encoders.bean(Dataset.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/dataset");
+
+		spark
+			.read()
+			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct").getPath())
+			.map(
+				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
+					.readValue(value, OtherResearchProduct.class),
+				Encoders.bean(OtherResearchProduct.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingDir.toString() + "/input/otherresearchproduct");
+
+		SparkEoscTag
+			.main(
+				new String[] {
+					"-isSparkSessionManaged", Boolean.FALSE.toString(),
+					"-sourcePath",
+					workingDir.toString() + "/input",
+					"-workingPath", workingDir.toString() + "/working"
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Software> tmp = sc
+			.textFile(workingDir.toString() + "/input/software")
+			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
+
+		Assertions.assertEquals(10, tmp.count());
+
+		Assertions
+			.assertEquals(
+				0,
+				tmp
+					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
+					.count());
+
+		JavaRDD<OtherResearchProduct> orp = sc
+			.textFile(workingDir.toString() + "/input/otherresearchproduct")
+			.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
+
+		Assertions.assertEquals(10, orp.count());
+
+		Assertions
+			.assertEquals(
+				3,
+				orp
+					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
+					.count());
+
+		JavaRDD<Dataset> dats = sc
+			.textFile(workingDir.toString() + "/input/dataset")
+			.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
+
+		Assertions.assertEquals(11, dats.count());
+
+		Assertions
+			.assertEquals(
+				3,
+				dats
+					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
+					.count());
+
+	}
+}
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/software/software_10.json
--- a/dhp-workflows/dhp-graph-mapper/pom.xml
+++ b/dhp-workflows/dhp-graph-mapper/pom.xml
@ -123,7 +123,6 @@
            <artifactId>json4s-jackson_2.11</artifactId>
        </dependency>

-
    </dependencies>


--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java
@ -0,0 +1,130 @@
+
+package eu.dnetlib.dhp.oa.graph.clean;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Context;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class CleanContextSparkJob implements Serializable {
+	private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class);
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				CleanContextSparkJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json"));
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		String inputPath = parser.get("inputPath");
+		log.info("inputPath: {}", inputPath);
+
+		String workingPath = parser.get("workingPath");
+		log.info("workingPath: {}", workingPath);
+
+		String contextId = parser.get("contextId");
+		log.info("contextId: {}", contextId);
+
+		String verifyParam = parser.get("verifyParam");
+		log.info("verifyParam: {}", verifyParam);
+
+		String graphTableClassName = parser.get("graphTableClassName");
+		log.info("graphTableClassName: {}", graphTableClassName);
+
+		Class<? extends Result> entityClazz = (Class<? extends Result>) Class.forName(graphTableClassName);
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+
+				cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath);
+			});
+	}
+
+	private static <T extends Result> void cleanContext(SparkSession spark, String contextId, String verifyParam,
+		String inputPath, Class<T> entityClazz, String workingPath) {
+		Dataset<T> res = spark
+			.read()
+			.textFile(inputPath)
+			.map(
+				(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, entityClazz),
+				Encoders.bean(entityClazz));
+
+		res.map((MapFunction<T, T>) r -> {
+			if (!r
+				.getTitle()
+				.stream()
+				.filter(
+					t -> t
+						.getQualifier()
+						.getClassid()
+						.equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
+				.anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))) {
+				return r;
+			}
+			r
+				.setContext(
+					r
+						.getContext()
+						.stream()
+						.filter(
+							c -> !c.getId().split("::")[0]
+								.equalsIgnoreCase(contextId))
+						.collect(Collectors.toList()));
+			return r;
+		}, Encoders.bean(entityClazz))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath);
+
+		spark
+			.read()
+			.textFile(workingPath)
+			.map(
+				(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, entityClazz),
+				Encoders.bean(entityClazz))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(inputPath);
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
@ -10,6 +10,7 @@ import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
@ -81,8 +82,9 @@ public class SparkPrepareResultProject implements Serializable {
 		Dataset<Relation> relation = Utils
 			.readPath(spark, inputPath + "/relation", Relation.class)
 			.filter(
-				"dataInfo.deletedbyinference = false and lower(relClass) = '"
-					+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
+				(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PRODUCED_BY));
+
 		Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
 			.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
@ -7,17 +7,22 @@ import java.io.Serializable;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.ForeachFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.oa.graph.dump.Utils;
 import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
 import eu.dnetlib.dhp.schema.dump.oaf.community.Project;

 /**
@ -49,9 +54,6 @@ public class SparkDumpFunderResults implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

-		final String graphPath = parser.get("graphPath");
-		log.info("relationPath: {}", graphPath);
-
 		SparkConf conf = new SparkConf();

 		runWithSparkSession(
@ -59,61 +61,74 @@ public class SparkDumpFunderResults implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				writeResultProjectList(spark, inputPath, outputPath, graphPath);
+				writeResultProjectList(spark, inputPath, outputPath);
 			});
 	}

-	private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath,
-		String graphPath) {
-
-		Dataset<eu.dnetlib.dhp.schema.oaf.Project> project = Utils
-			.readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
+	private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) {

 		Dataset<CommunityResult> result = Utils
 			.readPath(spark, inputPath + "/publication", CommunityResult.class)
 			.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
-			.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
+			.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
 			.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));

-		List<String> funderList = project
-			.select("id")
-			.map((MapFunction<Row, String>) value -> value.getString(0).substring(0, 15), Encoders.STRING())
-			.distinct()
-			.collectAsList();
+		log.info("Number of result {}", result.count());
+
+		Dataset<String> tmp = result
+			.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
+				return getFunderName(p);
+
+			}).collect(Collectors.toList()).iterator(), Encoders.STRING())
+			.distinct();
+
+		List<String> funderList = tmp.collectAsList();

 		funderList.forEach(funder -> {
-			String fundernsp = funder.substring(3);
-			String funderdump;
-			if (fundernsp.startsWith("corda")) {
-				funderdump = "EC_";
-				if (fundernsp.endsWith("h2020")) {
-					funderdump += "H2020";
-				} else {
-					funderdump += "FP7";
-				}
-			} else {
-				funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase();
-			}
-			writeFunderResult(funder, result, outputPath, funderdump);
+			dumpResults(funder, result, outputPath);
 		});

 	}

-	private static void dumpResults(String nsp, Dataset<CommunityResult> results, String outputPath,
-		String funderName) {
+	@NotNull
+	private static String getFunderName(Project p) {
+		Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
+		if (ofunder.isPresent()) {
+			String fName = ofunder.get().getShortName();
+			if (fName.equalsIgnoreCase("ec")) {
+				fName += "_" + ofunder.get().getFundingStream();
+			}
+			return fName;
+		} else {
+			String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
+			if (fName.equalsIgnoreCase("ec")) {
+				if (p.getId().contains("h2020")) {
+					fName += "_H2020";
+				} else {
+					fName += "_FP7";
+				}
+			} else if (fName.equalsIgnoreCase("conicytf")) {
+				fName = "CONICYT";
+			} else if (fName.equalsIgnoreCase("dfgf")) {
+				fName = "DFG";
+			} else if (fName.equalsIgnoreCase("tubitakf")) {
+				fName = "TUBITAK";
+			} else if (fName.equalsIgnoreCase("euenvagency")) {
+				fName = "EEA";
+			}
+			return fName;
+		}
+	}
+
+	private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {

 		results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
 			if (!Optional.ofNullable(r.getProjects()).isPresent()) {
 				return null;
 			}
 			for (Project p : r.getProjects()) {
-				if (p.getId().startsWith(nsp)) {
-					if (nsp.startsWith("40|irb")) {
-						if (p.getFunder().getShortName().equals(funderName))
-							return r;
-						else
-							return null;
-					}
+				String fName = getFunderName(p);
+				if (fName.equalsIgnoreCase(funder)) {
 					return r;
 				}
 			}
@ -123,18 +138,7 @@ public class SparkDumpFunderResults implements Serializable {
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
-			.json(outputPath + "/" + funderName);
-	}
-
-	private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath,
-		String funderDump) {
-
-		if (funder.startsWith("40|irb")) {
-			dumpResults(funder, results, outputPath, "HRZZ");
-			dumpResults(funder, results, outputPath, "MZOS");
-		} else
-			dumpResults(funder, results, outputPath, funderDump);
-
+			.json(outputPath + "/" + funder);
 	}

 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java
@ -5,9 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.Serializable;
 import java.util.Optional;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
@ -18,11 +21,18 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Constants;
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
 import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
+import scala.Tuple2;

 /**
 * Selects the results linked to projects. Only for these results the dump will be performed.
@ -58,8 +68,10 @@ public class SparkResultLinkedToProject implements Serializable {
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);

-		final String graphPath = parser.get("graphPath");
-		log.info("graphPath: {}", graphPath);
+		final String resultProjectsPath = parser.get("graphPath");
+		log.info("graphPath: {}", resultProjectsPath);
+
+		String communityMapPath = parser.get("communityMapPath");

 		@SuppressWarnings("unchecked")
 		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
@ -70,43 +82,33 @@ public class SparkResultLinkedToProject implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, graphPath);
+				writeResultsLinkedToProjects(
+					communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath);
 			});
 	}

-	private static <R extends Result> void writeResultsLinkedToProjects(SparkSession spark, Class<R> inputClazz,
-		String inputPath, String outputPath, String graphPath) {
+	private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
+		Class<R> inputClazz,
+		String inputPath, String outputPath, String resultProjectsPath) {

 		Dataset<R> results = Utils
 			.readPath(spark, inputPath, inputClazz)
-			.filter("dataInfo.deletedbyinference = false and datainfo.invisible = false");
-		Dataset<Relation> relations = Utils
-			.readPath(spark, graphPath + "/relation", Relation.class)
 			.filter(
-				"dataInfo.deletedbyinference = false and lower(relClass) = '"
-					+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
-		Dataset<Project> project = Utils.readPath(spark, graphPath + "/project", Project.class);
-
-		results.createOrReplaceTempView("result");
-		relations.createOrReplaceTempView("relation");
-		project.createOrReplaceTempView("project");
-
-		Dataset<R> tmp = spark
-			.sql(
-				"Select res.* " +
-					"from relation rel " +
-					"join result res " +
-					"on rel.source = res.id " +
-					"join project p " +
-					"on rel.target = p.id " +
-					"")
-			.as(Encoders.bean(inputClazz));
-		tmp
-			.groupByKey(
-				(MapFunction<R, String>) value -> value
-					.getId(),
-				Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, R, R>) (k, it) -> it.next(), Encoders.bean(inputClazz))
+				(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					!r.getDataInfo().getInvisible());
+		Dataset<ResultProject> resultProjectDataset = Utils
+			.readPath(spark, resultProjectsPath, ResultProject.class);
+		CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
+		results
+			.joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId")))
+			.map((MapFunction<Tuple2<R, ResultProject>, CommunityResult>) t2 -> {
+				CommunityResult cr = (CommunityResult) ResultMapper
+					.map(
+						t2._1(),
+						communityMap, Constants.DUMPTYPE.FUNDER.getType());
+				cr.setProjects(t2._2().getProjectsList());
+				return cr;
+			}, Encoders.bean(CommunityResult.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java
@ -0,0 +1,100 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
+import scala.Tuple2;
+
+public class ProjectsSubsetSparkJob implements Serializable {
+	private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class);
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				ProjectsSubsetSparkJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);
+
+		final String projectListPath = parser.get("projectListPath");
+		log.info("projectListPath: {}", projectListPath);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				Utils.removeOutputDir(spark, outputPath);
+				getNewProjectList(spark, inputPath, outputPath, projectListPath);
+			});
+	}
+
+	private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath,
+		String projectListPath) {
+
+		Dataset<String> projectList = spark.read().textFile(projectListPath);
+
+		Dataset<Project> projects;
+		projects = Utils.readPath(spark, inputPath, Project.class);
+
+		projects
+			.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
+			.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
+				if (Optional.ofNullable(t2._2()).isPresent())
+					return null;
+				return t2._1();
+			}, Encoders.bean(Project.class))
+			.filter(Objects::nonNull)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath);
+
+		Utils
+			.readPath(spark, outputPath, Project.class)
+			.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
+			.write()
+			.mode(SaveMode.Append)
+			.option("compression", "gzip")
+			.text(projectListPath);
+
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.util.*;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
@ -136,7 +137,7 @@ public class MergeGraphTableSparkJob {
 	/**
 	 * Datasources involved in the merge operation doesn't obey to the infra precedence policy, but relies on a custom
 	 * behaviour that, given two datasources from beta and prod returns the one from prod with the highest
-	 * compatibility among the two.
+	 * compatibility among the two. Furthermore, the procedure merges the collectedfrom, originalId, and pid lists.
 	 *
 	 * @param p datasource from PROD
 	 * @param b datasource from BETA
@ -160,9 +161,37 @@ public class MergeGraphTableSparkJob {

 		List<Qualifier> list = Arrays.asList(dp.getOpenairecompatibility(), db.getOpenairecompatibility());
 		dp.setOpenairecompatibility(Collections.min(list, new DatasourceCompatibilityComparator()));
+		dp
+			.setCollectedfrom(
+				Stream
+					.concat(
+						Optional
+							.ofNullable(dp.getCollectedfrom())
+							.map(Collection::stream)
+							.orElse(Stream.empty()),
+						Optional
+							.ofNullable(db.getCollectedfrom())
+							.map(Collection::stream)
+							.orElse(Stream.empty()))
+					.distinct() // relies on KeyValue.equals
+					.collect(Collectors.toList()));
+
+		dp.setOriginalId(mergeLists(dp.getOriginalId(), db.getOriginalId()));
+		dp.setPid(mergeLists(dp.getPid(), db.getPid()));
+
 		return (P) dp;
 	}

+	private static final <T> List<T> mergeLists(final List<T>... lists) {
+		return Arrays
+			.stream(lists)
+			.filter(Objects::nonNull)
+			.flatMap(List::stream)
+			.filter(Objects::nonNull)
+			.distinct()
+			.collect(Collectors.toList());
+	}
+
 	private static <P extends Oaf, B extends Oaf> P mergeWithPriorityToPROD(Optional<P> p, Optional<B> b) {
 		if (b.isPresent() & !p.isPresent()) {
 			return (P) b.get();
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -27,15 +27,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;

 import java.io.Closeable;
 import java.io.IOException;
@ -143,8 +135,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					smdbe.execute("queryClaims.sql", smdbe::processClaims);
 					break;
 				case openaire:
-					log.info("Processing datasources...");
-					smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
+					log.info("Processing services...");
+					smdbe.execute("queryServices.sql", smdbe::processService, verifyNamespacePrefix);

 					log.info("Processing projects...");
 					if (dbSchema.equalsIgnoreCase("beta")) {
@ -156,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					log.info("Processing Organizations...");
 					smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);

-					log.info("Processing relationsNoRemoval ds <-> orgs ...");
+					log.info("Processing relations services <-> orgs ...");
 					smdbe
 						.execute(
-							"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
+							"queryServiceOrganization.sql", smdbe::processServiceOrganization,
 							verifyNamespacePrefix);

 					log.info("Processing projects <-> orgs ...");
@ -235,32 +227,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 		dbClient.processResults(sql, consumer);
 	}

-	public List<Oaf> processDatasource(final ResultSet rs) {
+	public List<Oaf> processService(final ResultSet rs) {
 		try {
 			final DataInfo info = prepareDataInfo(rs);

 			final Datasource ds = new Datasource();

-			ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
+			ds.setId(createOpenaireId(10, rs.getString("id"), true));
 			ds
 				.setOriginalId(
 					Arrays
-						.asList((String[]) rs.getArray("identities").getArray())
+						.asList((String[]) rs.getArray("originalid").getArray())
 						.stream()
 						.filter(StringUtils::isNotBlank)
 						.collect(Collectors.toList()));
-			ds
-				.setCollectedfrom(
-					listKeyValues(
-						createOpenaireId(10, rs.getString("collectedfromid"), true),
-						rs.getString("collectedfromname")));
-			ds.setPid(new ArrayList<>());
+			ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
+			ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
 			ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
 			ds.setDateoftransformation(null); // Value not returned by the SQL query
 			ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
 			ds.setOaiprovenance(null); // Values not present in the DB
 			ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
 			ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
+			ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
+			ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
 			ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
 			ds.setOfficialname(field(rs.getString("officialname"), info));
 			ds.setEnglishname(field(rs.getString("englishname"), info));
@ -277,20 +267,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 			ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
 			ds.setOdpolicies(field(rs.getString("odpolicies"), info));
 			ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
-			ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
+			ds.setLanguages(listValues(rs.getArray("languages")));
 			ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
 			ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
 			ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
 			ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
-			ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
-			ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
 			ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
 			ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
 			ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
 			ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
 			ds.setVersioning(field(rs.getBoolean("versioning"), info));
+			ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
 			ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
-			ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
+
 			ds.setPidsystems(field(rs.getString("pidsystems"), info));
 			ds.setCertificates(field(rs.getString("certificates"), info));
 			ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
@ -299,13 +288,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					journal(
 						rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
 						rs.getString("issnLinking"), info)); // Journal
-			ds.setDataInfo(info);
-			ds.setLastupdatetimestamp(lastUpdateTimestamp);

+			ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
 			ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
 			ds.setThematic(rs.getBoolean("thematic"));
-			ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
 			ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
+			ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
+			ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
+			ds.setResearchproductaccesspolicies(listValues(rs.getArray("researchproductaccesspolicies")));
+			ds
+				.setResearchproductmetadataaccesspolicies(
+					listValues(rs.getArray("researchproductmetadataaccesspolicies")));
+
 			ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
 			ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
 			ds
@ -313,8 +307,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					Optional
 						.ofNullable(
 							rs.getDate("consenttermsofusedate"))
-						.map(c -> c.toString())
+						.map(java.sql.Date::toString)
 						.orElse(null));
+			ds
+				.setLastconsenttermsofusedate(
+					Optional
+						.ofNullable(
+							rs.getDate("lastconsenttermsofusedate"))
+						.map(java.sql.Date::toString)
+						.orElse(null));
+
+			ds.setDataInfo(info);
+			ds.setLastupdatetimestamp(lastUpdateTimestamp);

 			return Arrays.asList(ds);
 		} catch (final Exception e) {
@ -425,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 		}
 	}

-	public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
+	public List<Oaf> processServiceOrganization(final ResultSet rs) {
 		try {
 			final DataInfo info = prepareDataInfo(rs);
 			final String orgId = createOpenaireId(20, rs.getString("organization"), true);
-			final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
+			final String dsId = createOpenaireId(10, rs.getString("service"), true);
 			final List<KeyValue> collectedFrom = listKeyValues(
 				createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));

@ -603,6 +607,32 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 			String.format("%.3f", trust));
 	}

+	private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
+		if (Objects.isNull(values)) {
+			return null;
+		}
+		return Arrays
+			.stream((String[]) values.getArray())
+			.filter(Objects::nonNull)
+			.distinct()
+			.map(s -> keyValueSplitting(s, "@@@"))
+			.collect(Collectors.toList());
+	}
+
+	public static KeyValue keyValueSplitting(final String s, String separator) {
+		if (StringUtils.isBlank(s)) {
+			return null;
+		}
+		final String[] arr = s.split(separator);
+		if (arr.length != 2) {
+			return null;
+		}
+		KeyValue kv = new KeyValue();
+		kv.setKey(createOpenaireId(10, arr[0], true));
+		kv.setValue(arr[1]);
+		return kv;
+	}
+
 	private Qualifier prepareQualifierSplitting(final String s) {
 		if (StringUtils.isBlank(s)) {
 			return null;
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
@ -13,6 +13,23 @@
            <name>isLookupUrl</name>
            <description>the address of the lookUp service</description>
        </property>
+        <property>
+            <name>shouldCleanContext</name>
+            <description>true if the context have to be cleaned</description>
+        </property>
+        <property>
+            <name>contextId</name>
+            <value>sobigdata</value>
+            <description>It is the context id that should be removed from the result if the condition is matched.
+            Now it is just sobigdata. In a futere implementation I plan to have the contextId as value in a json
+            where to specify also the constraints that should be verified to remove the context from the result</description>
+        </property>
+        <property>
+            <name>verifyParam</name>
+            <value>gcube </value>
+            <description>It is the constrint to be verified. This time is hardcoded as gcube and it is searched for in
+            the title. If title starts with gcube than the context sobigdata will be removed by the result if present</description>
+        </property>

        <property>
            <name>sparkDriverMemory</name>
@ -275,7 +292,131 @@
        <error to="Kill"/>
    </action>

-    <join name="wait_clean" to="End"/>
+    <join name="wait_clean" to="clean_context"/>
+
+    <decision name="clean_context">
+        <switch>
+            <case to="fork_clean_context">${wf:conf('shouldCleanContext') eq true}</case>
+            <default to="End"/>
+        </switch>
+    </decision>
+
+
+    <fork name="fork_clean_context">
+        <path start="clean_publication_context"/>
+        <path start="clean_dataset_context"/>
+        <path start="clean_otherresearchproduct_context"/>
+        <path start="clean_software_context"/>
+    </fork>
+    <action name="clean_publication_context">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Clean publications context</name>
+            <class>eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=7680
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${graphOutputPath}/publication</arg>
+            <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/working/publication</arg>
+            <arg>--contextId</arg><arg>${contextId}</arg>
+            <arg>--verifyParam</arg><arg>${verifyParam}</arg>
+        </spark>
+        <ok to="wait_clean_context"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="clean_dataset_context">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Clean datasets Context</name>
+            <class>eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=7680
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${graphOutputPath}/dataset</arg>
+            <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/working/dataset</arg>
+            <arg>--contextId</arg><arg>${contextId}</arg>
+            <arg>--verifyParam</arg><arg>${verifyParam}</arg>
+        </spark>
+        <ok to="wait_clean_context"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="clean_otherresearchproduct_context">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Clean otherresearchproducts context</name>
+            <class>eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=7680
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
+            <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/working/otherresearchproduct</arg>
+            <arg>--contextId</arg><arg>${contextId}</arg>
+            <arg>--verifyParam</arg><arg>${verifyParam}</arg>
+        </spark>
+        <ok to="wait_clean_context"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="clean_software_context">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Clean softwares context</name>
+            <class>eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=7680
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${graphOutputPath}/software</arg>
+            <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/working/software</arg>
+            <arg>--contextId</arg><arg>${contextId}</arg>
+            <arg>--verifyParam</arg><arg>${verifyParam}</arg>
+        </spark>
+        <ok to="wait_clean_context"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="wait_clean_context" to="End"/>

    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json
@ -21,6 +21,6 @@
 	"paramName": "gp",
 	"paramLongName": "graphPath",
 	"paramDescription": "the relationPath",
-	"paramRequired": true
+	"paramRequired": false
 }
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json
@ -0,0 +1,23 @@
+[
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  }
+]
+
+
+
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json
@ -28,6 +28,12 @@
 		"paramLongName":"graphPath",
 		"paramDescription": "the path to the relations",
 		"paramRequired": true
+	},
+	{
+		"paramName":"cmp",
+		"paramLongName":"communityMapPath",
+		"paramDescription": "the path to the relations",
+		"paramRequired": true
 	}
 ]

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json
@ -0,0 +1,29 @@
+[
+
+	{
+		"paramName":"s",
+		"paramLongName":"sourcePath",
+		"paramDescription": "the path of the sequencial file to read",
+		"paramRequired": true
+	},
+	{
+		"paramName": "out",
+		"paramLongName": "outputPath",
+		"paramDescription": "the path used to store temporary output files",
+		"paramRequired": true
+	},
+	{
+		"paramName": "ssm",
+		"paramLongName": "isSparkSessionManaged",
+		"paramDescription": "true if the spark session is managed, false otherwise",
+		"paramRequired": false
+	},
+	{
+		"paramName": "pl",
+		"paramLongName": "projectListPath",
+		"paramDescription": "the path of the association result projectlist",
+		"paramRequired": true
+	}
+]
+
+
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml
@ -0,0 +1,30 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml
@ -0,0 +1,185 @@
+<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
+
+    <parameters>
+
+        <property>
+            <name>sourcePath</name>
+            <description>the source path</description>
+        </property>
+        <property>
+            <name>projectListPath</name>
+            <description>the path to the project list</description>
+        </property>
+
+        <property>
+            <name>outputPath</name>
+            <description>the output path</description>
+        </property>
+
+        <property>
+            <name>accessToken</name>
+            <description>the access token used for the deposition in Zenodo</description>
+        </property>
+        <property>
+            <name>connectionUrl</name>
+            <description>the connection url for Zenodo</description>
+        </property>
+        <property>
+            <name>metadata</name>
+            <description> the metadata associated to the deposition</description>
+        </property>
+        <property>
+            <name>depositionType</name>
+            <description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
+        </property>
+        <property>
+            <name>conceptRecordId</name>
+            <description>for new version, the id of the record for the old deposition</description>
+        </property>
+        <property>
+            <name>depositionId</name>
+            <description>the depositionId of a deposition open that has to be added content</description>
+        </property>
+
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+
+    <start to="dump_project"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <action name="dump_project">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table project </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/project</arg>
+            <arg>--communityMapPath</arg><arg>noneed</arg>
+        </spark>
+        <ok to="get_new_projects"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="get_new_projects">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table project </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/project</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/tar/project</arg>
+            <arg>--projectListPath</arg><arg>${projectListPath}</arg>
+        </spark>
+        <ok to="make_archive"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="make_archive">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
+            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
+        </java>
+        <ok to="send_zenodo"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="send_zenodo">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
+            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--accessToken</arg><arg>${accessToken}</arg>
+            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
+            <arg>--metadata</arg><arg>${metadata}</arg>
+            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
+            <arg>--depositionType</arg><arg>${depositionType}</arg>
+            <arg>--depositionId</arg><arg>${depositionId}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+
+</workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml
@ -1,347 +0,0 @@
-<workflow-app name="sub_dump_community_funder_results" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-
-        <property>
-            <name>sourcePath</name>
-            <description>the source path</description>
-        </property>
-        <property>
-            <name>outputPath</name>
-            <description>the output path</description>
-        </property>
-        <property>
-            <name>communityMapPath</name>
-            <description>the path to the community map</description>
-        </property>
-        <property>
-            <name>selectedResults</name>
-            <description>the path the the possible subset ot results to be dumped</description>
-        </property>
-        <property>
-            <name>hiveDbName</name>
-            <description>the target hive database name</description>
-        </property>
-        <property>
-            <name>hiveJdbcUrl</name>
-            <description>hive server jdbc url</description>
-        </property>
-        <property>
-            <name>hiveMetastoreUris</name>
-            <description>hive server metastore URIs</description>
-        </property>
-        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
-        </property>
-        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
-        </property>
-        <property>
-            <name>oozieActionShareLibForSpark2</name>
-            <description>oozie action sharelib for spark 2.*</description>
-        </property>
-        <property>
-            <name>spark2ExtraListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
-            <description>spark 2.* extra listeners classname</description>
-        </property>
-        <property>
-            <name>spark2SqlQueryExecutionListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
-            <description>spark 2.* sql query execution listeners classname</description>
-        </property>
-        <property>
-            <name>spark2YarnHistoryServerAddress</name>
-            <description>spark 2.* yarn history server address</description>
-        </property>
-        <property>
-            <name>spark2EventLogDir</name>
-            <description>spark 2.* event log dir location</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>mapreduce.job.queuename</name>
-                <value>${queueName}</value>
-            </property>
-            <property>
-                <name>oozie.launcher.mapred.job.queue.name</name>
-                <value>${oozieLauncherQueueName}</value>
-            </property>
-            <property>
-                <name>oozie.action.sharelib.for.spark</name>
-                <value>${oozieActionShareLibForSpark2}</value>
-            </property>
-
-        </configuration>
-    </global>
-
-    <start to="fork_dump"/>
-
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-
-
-
-    <fork name="fork_dump">
-        <path start="dump_publication"/>
-        <path start="dump_dataset"/>
-        <path start="dump_orp"/>
-        <path start="dump_software"/>
-    </fork>
-
-    <action name="dump_publication">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table publication for community/funder related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/publication</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-            <arg>--dumpType</arg><arg>${dumpType}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_dataset">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table dataset for community/funder related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/dataset</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_orp">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table ORP for community related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/otherresearchproduct</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_software">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table software for community related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/software</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <join name="join_dump" to="prepareResultProject"/>
-
-    <action name="prepareResultProject">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Prepare association result subset of project info</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="fork_extendWithProject"/>
-        <error to="Kill"/>
-    </action>
-
-    <fork name="fork_extendWithProject">
-        <path start="extend_publication"/>
-        <path start="extend_dataset"/>
-        <path start="extend_orp"/>
-        <path start="extend_software"/>
-    </fork>
-
-    <action name="extend_publication">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped publications with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/publication</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_dataset">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped dataset with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/dataset</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_orp">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped ORP with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/orp</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_software">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped software with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/software</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-    <join name="join_extend" to="End"/>
-
-    <end name="End"/>
-
-</workflow-app>
-
-
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt
@ -1,2 +0,0 @@
-## This is a classpath-based import file (this header is required)
-dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml
@ -77,42 +77,259 @@
        </configuration>
    </global>

-    <start to="common_action_community_funder"/>
+    <start to="fork_dump"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="common_action_community_funder">
-        <sub-workflow>
-            <app-path>${wf:appPath()}/dump_common
-            </app-path>
-            <propagate-configuration/>
-            <configuration>
-                <property>
-                    <name>sourcePath</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>selectedResults</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>communityMapPath</name>
-                    <value>${workingDir}/communityMap</value>
-                </property>
-                <property>
-                    <name>outputPath</name>
-                    <value>${workingDir}</value>
-                </property>
-            </configuration>
-        </sub-workflow>
-        <ok to="splitForCommunities" />
-        <error to="Kill" />
+    <fork name="fork_dump">
+        <path start="dump_publication"/>
+        <path start="dump_dataset"/>
+        <path start="dump_orp"/>
+        <path start="dump_software"/>
+    </fork>
+
+    <action name="dump_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table publication for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+            <arg>--dumpType</arg><arg>${dumpType}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table dataset for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table ORP for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table software for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_dump" to="prepareResultProject"/>
+
+    <action name="prepareResultProject">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Prepare association result subset of project info</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="fork_extendWithProject"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name="fork_extendWithProject">
+        <path start="extend_publication"/>
+        <path start="extend_dataset"/>
+        <path start="extend_orp"/>
+        <path start="extend_software"/>
+    </fork>
+
+    <action name="extend_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped publications with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped dataset with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped ORP with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped software with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
    </action>


-
+    <join name="join_extend" to="splitForCommunities"/>
    <action name="splitForCommunities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
@ -298,6 +298,7 @@
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/validrelation</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt
@ -1,2 +0,0 @@
-## This is a classpath-based import file (this header is required)
-dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml
@ -77,12 +77,36 @@
        </configuration>
    </global>

-    <start to="fork_result_linked_to_projects"/>
+    <start to="prepareResultProject"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

+    <action name="prepareResultProject">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Prepare association result subset of project info</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="fork_result_linked_to_projects"/>
+        <error to="Kill"/>
+    </action>
+

    <fork name="fork_result_linked_to_projects">
        <path start="select_publication_linked_to_projects"/>
@ -111,7 +135,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -137,7 +162,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -163,7 +189,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -189,41 +216,14 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
    </action>

-    <join name="join_link" to="common_action_community_funder"/>
-
-    <action name="common_action_community_funder">
-        <sub-workflow>
-            <app-path>${wf:appPath()}/dump_common
-            </app-path>
-            <propagate-configuration/>
-            <configuration>
-                <property>
-                    <name>sourcePath</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>selectedResults</name>
-                    <value>${workingDir}/result</value>
-                </property>
-                <property>
-                    <name>communityMapPath</name>
-                    <value>${workingDir}/communityMap</value>
-                </property>
-                <property>
-                    <name>outputPath</name>
-                    <value>${workingDir}</value>
-                </property>
-            </configuration>
-        </sub-workflow>
-        <ok to="dump_funder_results" />
-        <error to="Kill" />
-    </action>
+    <join name="join_link" to="dump_funder_results"/>

    <action name="dump_funder_results">
        <spark xmlns="uri:oozie:spark-action:0.2">
@ -242,9 +242,8 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/result</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json
@ -0,0 +1,37 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "in",
+    "paramLongName": "inputPath",
+    "paramDescription": "the path to the graph data dump to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingPath",
+    "paramDescription": "the path to store the output graph",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ci",
+    "paramLongName": "contextId",
+    "paramDescription": "the id of the context to be removed",
+    "paramRequired": true
+  },
+  {
+    "paramName": "class",
+    "paramLongName": "graphTableClassName",
+    "paramDescription": "class name moelling the graph table",
+    "paramRequired": true
+  },{
+  "paramName": "vf",
+  "paramLongName": "verifyParam",
+  "paramDescription": "the parameter to be verified to remove the context",
+  "paramRequired": true
+}
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
@ -30,6 +30,11 @@
            <value></value>
            <description>a blacklist of nsprefixes (comma separeted)</description>
        </property>
+        <property>
+            <name>reuseContent</name>
+            <value>false</value>
+            <description>reuse content in the aggregator database</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -85,12 +90,20 @@
        </configuration>
    </global>

-	<start to="ImportDB"/>
+	<start to="reuse_db"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

+    <decision name="reuse_db">
+        <switch>
+            <case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
+            <case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
+            <default to="ImportDB"/>
+        </switch>
+    </decision>
+
    <action name="ImportDB">
        <java>
            <prepare>
@ -102,6 +115,7 @@
            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+            <arg>--action</arg><arg>openaire</arg>
            <arg>--dbschema</arg><arg>${dbSchema}</arg>
            <arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
        </java>
@ -124,6 +138,55 @@
            <arg>--action</arg><arg>claims</arg>
            <arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
        </java>
+        <ok to="GenerateEntities"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenerateEntities">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>GenerateEntities</name>
+            <class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory ${sparkExecutorMemory}
+                --executor-cores ${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
+            <arg>--targetPath</arg><arg>${workingDir}/entities</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+            <arg>--shouldHashId</arg><arg>true</arg>
+        </spark>
+        <ok to="GenerateGraph"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenerateGraph">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>GenerateGraph</name>
+            <class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory ${sparkExecutorMemory}
+                --executor-cores ${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=7680
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
+            <arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
+        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
@ -27,7 +27,7 @@ SELECT
 		'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
 		 array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
 FROM dsm_organizations o
-	LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
+	LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
 	LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
 	LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid)
 GROUP BY
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql
@ -16,4 +16,4 @@ SELECT

 FROM project_organization po
 	LEFT OUTER JOIN projects p ON (p.id = po.project)
-	LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom);
+	LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
@ -42,7 +42,7 @@ SELECT
                LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
                LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)

-                LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
+                LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)

                LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
                LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
@ -40,7 +40,7 @@ SELECT
                LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
                LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)

-                LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
+                LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)

                LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
                LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql
@ -1,5 +1,5 @@
 SELECT
-	dor.datasource                                           AS datasource,
+	dor.service                                              AS service,
 	dor.organization                                         AS organization,
 	NULL                                                     AS startdate,
 	NULL                                                     AS enddate,
@ -11,6 +11,6 @@ SELECT
    dc.officialname                                          AS collectedfromname,
 	'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
 	d.provenanceaction || '@@@dnet:provenanceActions'        AS provenanceaction
-FROM dsm_datasource_organization dor
-	LEFT OUTER JOIN dsm_datasources d  ON (dor.datasource = d.id)
-	LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom);
+FROM dsm_service_organization dor
+	LEFT OUTER JOIN dsm_services d  ON (dor.service = d.id)
+	LEFT OUTER JOIN dsm_services dc ON (dc.id = d.collectedfrom);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
@ -1,6 +1,7 @@
 SELECT
-	d.id                                                                                                       AS datasourceid,
-	d.id || array_agg(distinct di.pid)                                                                         AS identities,
+	d.id                                                                                                       AS id,
+	array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL)                AS originalid,
+	array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid,
 	d.officialname                                                                                             AS officialname,
 	d.englishname                                                                                              AS englishname,
 	d.contactemail                                                                                             AS contactemail,
@ -9,8 +10,8 @@ SELECT
    			THEN
    				'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
 		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
-                	THEN
-                    		'openaire4.0@@@dnet:datasourceCompatibilityLevel'
+                THEN
+                     'openaire4.0@@@dnet:datasourceCompatibilityLevel'
 		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
 			THEN
 				'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
@ -40,25 +41,29 @@ SELECT
 	END                                                                                                        AS openairecompatibility,
 	d.websiteurl                                                                                               AS websiteurl,
 	d.logourl                                                                                                  AS logourl,
-	array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END)                              AS accessinfopackage,
+	array_remove(array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END), NULL)  AS accessinfopackage,
 	d.latitude                                                                                                 AS latitude,
 	d.longitude                                                                                                AS longitude,
 	d.namespaceprefix                                                                                          AS namespaceprefix,
 	NULL                                                                                                       AS odnumberofitems,
 	NULL                                                                                                       AS odnumberofitemsdate,
-
-	(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
+	(SELECT array_agg(s|| '###keyword@@@dnet:subject_classification_typologies')
 		FROM UNNEST(
 			ARRAY(
 				SELECT trim(s)
-        FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s)                                   AS subjects,
+        FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s)                                            AS subjects,

 	d.description                                                                                              AS description,
 	NULL                                                                                                       AS odpolicies,
-	ARRAY(SELECT trim(s)
-	      FROM unnest(string_to_array(d.languages, ',')) AS s)                                                 AS odlanguages,
-	ARRAY(SELECT trim(s)
-	      FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)                                           AS odcontenttypes,
+	array_remove(ARRAY(SELECT trim(s)
+	      FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}')         AS odlanguages,
+	array_remove(ARRAY(SELECT trim(s)
+	      FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}')         AS languages,
+	-- Term provided only by     OpenDOAR:
+	--   probably updating the TR it could be replaced by research_entity_types[]
+	--   But a study on the vocabulary terms is needed
+	--   REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)              AS odcontenttypes,
+	
 	false                                                                                                      AS inferred,
 	false                                                                                                      AS deletedbyinference,
 	0.9                                                                                                        AS trust,
@ -69,39 +74,59 @@ SELECT
 	d.releasestartdate                                                                                         AS releasestartdate,
 	d.releaseenddate                                                                                           AS releaseenddate,
 	d.missionstatementurl                                                                                      AS missionstatementurl,
-	d.dataprovider                                                                                             AS dataprovider,
-	d.serviceprovider                                                                                          AS serviceprovider,
+	-- the following 2 fields (provided by re3data) have been replaced by research_entity_types[]
+	--  VALUE 'Research Data' : d.dataprovider                                                                 AS dataprovider,
+	--  VALUE 'Services'      : d.serviceprovider                                                              AS serviceprovider,
 	d.databaseaccesstype                                                                                       AS databaseaccesstype,
 	d.datauploadtype                                                                                           AS datauploadtype,
 	d.databaseaccessrestriction                                                                                AS databaseaccessrestriction,
 	d.datauploadrestriction                                                                                    AS datauploadrestriction,
-	d.versioning                                                                                               AS versioning,
+	-- REPLACED BY version_control : d.versioning                                                              AS versioning,
+	d.version_control                                                                                          AS versioning,
+	d.version_control                                                                                          AS versioncontrol,
 	d.citationguidelineurl                                                                                     AS citationguidelineurl,
-	d.qualitymanagementkind                                                                                    AS qualitymanagementkind,
-	d.pidsystems                                                                                               AS pidsystems,
+	array_to_string(array_agg(distinct dps.scheme), ' ')                                                       AS pidsystems,
 	d.certificates                                                                                             AS certificates,
 	ARRAY[]::text[]                                                                                            AS policies,
-	dc.id                                                                                                      AS collectedfromid,
-	dc.officialname                                                                                            AS collectedfromname,
-	d.typology||'@@@dnet:datasource_typologies'                                                                AS datasourcetype,
-	d.typology||'@@@dnet:datasource_typologies_ui'                                                             AS datasourcetypeui,
-	'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions'                                             AS provenanceaction,
+	array_remove(
+	    array(
+        select distinct cf
+        from unnest(
+            dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname)
+        ) as cf),
+	    NULL)                                                                                                  AS collectedfrom,
+	d._typology_to_remove_||'@@@dnet:datasource_typologies'                                                    AS datasourcetype,
+	d._typology_to_remove_||'@@@dnet:datasource_typologies_ui'                                                 AS datasourcetypeui,
+	d.eosc_type||'@@@dnet:eosc_types'                                                                          AS eosctype,
+	d.eosc_datasource_type||'@@@dnet:eosc_datasource_types'                                                    AS eoscdatasourcetype,
 	d.issn                                                                                                     AS issnPrinted,
 	d.eissn                                                                                                    AS issnOnline,
 	d.lissn                                                                                                    AS issnLinking,
+	d.research_entity_types                                                                                    AS researchentitytypes,
 	d.consenttermsofuse                                                                                        AS consenttermsofuse,
 	d.fulltextdownload                                                                                         AS fulltextdownload,
 	d.consenttermsofusedate                                                                                    AS consenttermsofusedate,
-	de.jurisdiction||'@@@eosc:jurisdictions'                                                                   AS jurisdiction,
-	de.thematic                                                                                                AS thematic,
-	de.knowledge_graph                                                                                         AS knowledgegraph,
-	array(select unnest(de.content_policies)||'@@@eosc:contentpolicies')                                       AS contentpolicies
+	d.lastconsenttermsofusedate                                                                                AS lastconsenttermsofusedate,
+	d.jurisdiction||'@@@eosc:jurisdictions'                                                                    AS jurisdiction,
+	d.thematic                                                                                                 AS thematic,
+	array(select unnest(d.content_policies)||'@@@eosc:contentpolicies')                                        AS contentpolicies,
+	nullif(trim(d.submission_policy_url), '')                                                                  AS submissionpolicyurl,
+	nullif(trim(d.preservation_policy_url), '')                                                                                  AS preservationpolicyurl,
+	array_remove(d.research_product_access_policies, '')                                                       AS researchproductaccesspolicies,
+    array_remove(d.research_product_metadata_access_policies, '')                                              AS researchproductmetadataaccesspolicies

-FROM dsm_datasources d
-LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
-LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
-LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
-LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
+FROM dsm_services d
+LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id)
+LEFT OUTER JOIN dsm_api a ON (d.id = a.service)
+LEFT OUTER JOIN dsm_servicepids dp ON (d.id = dp.service)
+LEFT OUTER JOIN dsm_identities di ON (dp.pid = di.pid)
+LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id)
+LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id)
+LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id)
+LEFT OUTER JOIN dsm_pid_systems dps ON (d.id = dps.service)
+
+WHERE
+    d.dedup_main_service = true

 GROUP BY
 	d.id,
@ -119,23 +144,27 @@ GROUP BY
 	d.releasestartdate,
 	d.releaseenddate,
 	d.missionstatementurl,
-	d.dataprovider,
-	d.serviceprovider,
+	-- TODO REMOVED ???: d.dataprovider,
+	-- TODO REMOVED ???: d.serviceprovider,
 	d.databaseaccesstype,
 	d.datauploadtype,
 	d.databaseaccessrestriction,
 	d.datauploadrestriction,
-	d.versioning,
+	-- REPLACED BY version_control : d.versioning,
+	d.version_control,
 	d.citationguidelineurl,
-	d.qualitymanagementkind,
-	d.pidsystems,
+	-- REMOVED: d.qualitymanagementkind,
 	d.certificates,
 	dc.id,
 	dc.officialname,
 	d.issn,
 	d.eissn,
 	d.lissn,
-	de.jurisdiction,
-	de.thematic,
-	de.knowledge_graph,
-	de.content_policies
+	d.jurisdiction,
+	d.thematic,
+	-- REMOVED ???: de.knowledge_graph,
+	d.content_policies,
+	d.submission_policy_url,
+	d.preservation_policy_url,
+	d.research_product_access_policies,
+    d.research_product_metadata_access_policies
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala
@ -7,6 +7,7 @@ import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
+import scala.collection.JavaConverters._

 object SparkConvertRDDtoDataset {

@ -94,8 +95,8 @@ object SparkConvertRDDtoDataset {
    log.info("Converting Relation")

    val relationSemanticFilter = List(
-      "cites",
-      "iscitedby",
+//      "cites",
+//      "iscitedby",
      "merges",
      "ismergedin",
      "HasAmongTopNSimilarDocuments",
@ -107,6 +108,12 @@ object SparkConvertRDDtoDataset {
      .map(s => mapper.readValue(s, classOf[Relation]))
      .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
      .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
+      //filter OpenCitations relations
+      .filter(r =>
+        r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
+          "opencitations".equalsIgnoreCase(k.getValue)
+        )
+      )
      .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
    spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")

--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala
@ -1,136 +0,0 @@
-package eu.dnetlib.dhp.sx.graph.pangaea
-
-import org.apache.spark.sql.expressions.Aggregator
-import org.apache.spark.sql.{Encoder, Encoders}
-import org.json4s
-import org.json4s.DefaultFormats
-import org.json4s.jackson.JsonMethods.parse
-
-import java.util.regex.Pattern
-import scala.language.postfixOps
-import scala.xml.{Elem, Node, XML}
-
-case class PangaeaDataModel(
-  identifier: String,
-  title: List[String],
-  objectType: List[String],
-  creator: List[String],
-  publisher: List[String],
-  dataCenter: List[String],
-  subject: List[String],
-  language: String,
-  rights: String,
-  parent: String,
-  relation: List[String],
-  linkage: List[(String, String)]
-) {}
-
-object PangaeaUtils {
-
-  def toDataset(input: String): PangaeaDataModel = {
-    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
-    lazy val json: json4s.JValue = parse(input)
-    val xml = (json \ "xml").extract[String]
-    parseXml(xml)
-  }
-
-  def findDOIInRelation(input: List[String]): List[String] = {
-    val pattern = Pattern.compile("\\b(10[.][0-9]{4,}(?:[.][0-9]+)*\\/(?:(?![\"&\\'<>])\\S)+)\\b")
-    input
-      .map(i => {
-        val matcher = pattern.matcher(i)
-        if (matcher.find())
-          matcher.group(0)
-        else
-          null
-      })
-      .filter(i => i != null)
-  }
-
-  def attributeOpt(attribute: String, node: Node): Option[String] =
-    node.attribute(attribute) flatMap (_.headOption) map (_.text)
-
-  def extractLinkage(node: Elem): List[(String, String)] = {
-    (node \ "linkage")
-      .map(n => (attributeOpt("type", n), n.text))
-      .filter(t => t._1.isDefined)
-      .map(t => (t._1.get, t._2))(collection.breakOut)
-  }
-
-  def parseXml(input: String): PangaeaDataModel = {
-    val xml = XML.loadString(input)
-
-    val identifier = (xml \ "identifier").text
-    val title: List[String] = (xml \ "title").map(n => n.text)(collection.breakOut)
-    val pType: List[String] = (xml \ "type").map(n => n.text)(collection.breakOut)
-    val creators: List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut)
-    val publisher: List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut)
-    val dataCenter: List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut)
-    val subject: List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut)
-    val language = (xml \ "language").text
-    val rights = (xml \ "rights").text
-    val parentIdentifier = (xml \ "parentIdentifier").text
-    val relation: List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut)
-    val relationFiltered = findDOIInRelation(relation)
-    val linkage: List[(String, String)] = extractLinkage(xml)
-
-    PangaeaDataModel(
-      identifier,
-      title,
-      pType,
-      creators,
-      publisher,
-      dataCenter,
-      subject,
-      language,
-      rights,
-      parentIdentifier,
-      relationFiltered,
-      linkage
-    )
-  }
-
-  def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
-    new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] {
-
-      override def zero: PangaeaDataModel = null
-
-      override def reduce(b: PangaeaDataModel, a: (String, PangaeaDataModel)): PangaeaDataModel = {
-        if (b == null)
-          a._2
-        else {
-          if (a == null)
-            b
-          else {
-            if (b.title != null && b.title.nonEmpty)
-              b
-            else
-              a._2
-
-          }
-        }
-      }
-
-      override def merge(b1: PangaeaDataModel, b2: PangaeaDataModel): PangaeaDataModel = {
-        if (b1 == null)
-          b2
-        else {
-          if (b2 == null)
-            b1
-          else {
-            if (b1.title != null && b1.title.nonEmpty)
-              b1
-            else
-              b2
-
-          }
-        }
-      }
-      override def finish(reduction: PangaeaDataModel): PangaeaDataModel = reduction
-
-      override def bufferEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
-
-      override def outputEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
-    }
-
-}
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala
@ -1,58 +0,0 @@
-package eu.dnetlib.dhp.sx.graph.pangaea
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
-import org.apache.spark.{SparkConf, SparkContext}
-import org.slf4j.{Logger, LoggerFactory}
-import scala.collection.JavaConverters._
-
-import scala.io.Source
-
-object SparkGeneratePanagaeaDataset {
-
-  def main(args: Array[String]): Unit = {
-    val logger: Logger = LoggerFactory.getLogger(getClass)
-    val conf: SparkConf = new SparkConf()
-    val parser = new ArgumentApplicationParser(
-      Source
-        .fromInputStream(
-          getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json")
-        )
-        .mkString
-    )
-    parser.parseArgument(args)
-
-    val spark: SparkSession =
-      SparkSession
-        .builder()
-        .config(conf)
-        .appName(SparkGeneratePanagaeaDataset.getClass.getSimpleName)
-        .master(parser.get("master"))
-        .getOrCreate()
-
-    parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}"))
-    logger.info("Converting sequential file into Dataset")
-    val sc: SparkContext = spark.sparkContext
-
-    val workingPath: String = parser.get("workingPath")
-
-    implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
-
-    val inputRDD: RDD[PangaeaDataModel] =
-      sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s))
-
-    spark
-      .createDataset(inputRDD)
-      .as[PangaeaDataModel]
-      .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders))
-      .groupByKey(_._1)(Encoders.STRING)
-      .agg(PangaeaUtils.getDatasetAggregator().toColumn)
-      .map(s => s._2)
-      .write
-      .mode(SaveMode.Overwrite)
-      .save(s"$workingPath/dataset")
-
-  }
-
-}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java
@ -0,0 +1,300 @@
+
+package eu.dnetlib.dhp.oa.graph.clean;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.Constants;
+import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.Instance;
+import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Software;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class CleanContextTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+
+	private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(DumpJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(DumpJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	public void testResultClean() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json")
+			.getPath();
+		final String prefix = "gcube ";
+
+		spark
+			.read()
+			.textFile(sourcePath)
+			.map(
+				(MapFunction<String, Publication>) r -> OBJECT_MAPPER.readValue(r, Publication.class),
+				Encoders.bean(Publication.class))
+			.write()
+			.json(workingDir.toString() + "/publication");
+
+		CleanContextSparkJob.main(new String[] {
+			"--isSparkSessionManaged", Boolean.FALSE.toString(),
+			"--inputPath", workingDir.toString() + "/publication",
+			"-graphTableClassName", Publication.class.getCanonicalName(),
+			"-workingPath", workingDir.toString() + "/working",
+			"-contextId", "sobigdata",
+			"-verifyParam", "gCube "
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+		JavaRDD<Publication> tmp = sc
+			.textFile(workingDir.toString() + "/publication")
+			.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
+
+		Assertions.assertEquals(7, tmp.count());
+
+		// original result with sobigdata context and gcube as starting string in the main title for the publication
+		Assertions
+			.assertEquals(
+				0,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+
+		// original result with sobigdata context without gcube as starting string in the main title for the publication
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"sobigdata::projects::2",
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+
+		// original result with sobigdata context with gcube as starting string in the subtitle
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"sobigdata::projects::2",
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+		List<StructuredProperty> titles = tmp
+			.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
+			.collect()
+			.get(0)
+			.getTitle();
+		Assertions.assertEquals(1, titles.size());
+		Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix));
+		Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid());
+
+		// original result with sobigdata context with gcube not as starting string in the main title
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"sobigdata::projects::1",
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+		titles = tmp
+			.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
+			.collect()
+			.get(0)
+			.getTitle();
+		Assertions.assertEquals(1, titles.size());
+		Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix));
+		Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()));
+		Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
+
+		// original result with sobigdata in context and also other contexts with gcube as starting string for the main
+		// title
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"dh-ch",
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+		titles = tmp
+			.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
+			.collect()
+			.get(0)
+			.getTitle();
+		Assertions.assertEquals(1, titles.size());
+		Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix));
+		Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
+
+		// original result with multiple main title one of which whith gcube as starting string and with 2 contextes
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"dh-ch",
+				tmp
+					.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+		titles = tmp
+			.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
+			.collect()
+			.get(0)
+			.getTitle();
+		Assertions.assertEquals(2, titles.size());
+		Assertions
+			.assertTrue(
+				titles
+					.stream()
+					.anyMatch(
+						t -> t.getQualifier().getClassid().equals("main title")
+							&& t.getValue().toLowerCase().startsWith(prefix)));
+
+		// original result without sobigdata in context with gcube as starting string for the main title
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
+					.collect()
+					.get(0)
+					.getContext()
+					.size());
+		Assertions
+			.assertEquals(
+				"dh-ch",
+				tmp
+					.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
+					.collect()
+					.get(0)
+					.getContext()
+					.get(0)
+					.getId());
+		titles = tmp
+			.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
+			.collect()
+			.get(0)
+			.getTitle();
+		Assertions.assertEquals(2, titles.size());
+
+		Assertions
+			.assertTrue(
+				titles
+					.stream()
+					.anyMatch(
+						t -> t.getQualifier().getClassid().equals("main title")
+							&& t.getValue().toLowerCase().startsWith(prefix)));
+
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
@ -321,4 +321,27 @@ public class PrepareResultProjectJobTest {
 			3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());

 	}
+
+	@Test
+	void testMatchx() throws Exception {
+
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
+			.getPath();
+
+		SparkPrepareResultProject.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/preparedInfo",
+			"-sourcePath", sourcePath
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<ResultProject> tmp = sc
+			.textFile(workingDir.toString() + "/preparedInfo")
+			.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
+
+		tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Result;

@ -76,7 +77,11 @@ public class ResultLinkedToProjectTest {
 			.getPath();

 		final String graphPath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch")
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
+			.getPath();
+
+		final String communityMapPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
 			.getPath();

 		SparkResultLinkedToProject.main(new String[] {
@ -84,20 +89,18 @@ public class ResultLinkedToProjectTest {
 			"-outputPath", workingDir.toString() + "/preparedInfo",
 			"-sourcePath", sourcePath,
 			"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
-			"-graphPath", graphPath
+			"-graphPath", graphPath,
+			"-communityMapPath", communityMapPath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		JavaRDD<Result> tmp = sc
+		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/preparedInfo")
-			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
+			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));

-		org.apache.spark.sql.Dataset<Result> verificationDataset = spark
-			.createDataset(tmp.rdd(), Encoders.bean(Result.class));
-
-		Assertions.assertEquals(0, verificationDataset.count());
+		Assertions.assertEquals(0, tmp.count());

 	}

@ -108,8 +111,12 @@ public class ResultLinkedToProjectTest {
 			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json")
 			.getPath();

-		final String relationPath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
+		final String graphPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
+			.getPath();
+
+		final String communityMapPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
 			.getPath();

 		SparkResultLinkedToProject.main(new String[] {
@ -117,20 +124,18 @@ public class ResultLinkedToProjectTest {
 			"-outputPath", workingDir.toString() + "/preparedInfo",
 			"-sourcePath", sourcePath,
 			"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
-			"-graphPath", relationPath
+			"-graphPath", graphPath,
+			"-communityMapPath", communityMapPath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		JavaRDD<Publication> tmp = sc
+		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/preparedInfo")
-			.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
+			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));

-		org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
-			.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
-
-		Assertions.assertEquals(1, verificationDataset.count());
+		Assertions.assertEquals(1, tmp.count());

 	}

--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
@ -5,10 +5,14 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;

+// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults2;
+// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkGetFunderList;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
@ -68,20 +72,19 @@ public class SplitPerFunderTest {
 	void test1() throws Exception {

 		final String sourcePath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump")
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext")
 			.getPath();

 		SparkDumpFunderResults.main(new String[] {
 			"-isSparkSessionManaged", Boolean.FALSE.toString(),
 			"-outputPath", workingDir.toString() + "/split",
-			"-sourcePath", sourcePath,
-			"-graphPath", sourcePath
+			"-sourcePath", sourcePath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		// FP7 3
+		// FP7 3 and H2020 3
 		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/split/EC_FP7")
 			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
@ -143,11 +146,6 @@ public class SplitPerFunderTest {
 			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
 		Assertions.assertEquals(1, tmp.count());

-		// CONICYT 0
-		tmp = sc
-			.textFile(workingDir.toString() + "/split/CONICYTF")
-			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
-		Assertions.assertEquals(0, tmp.count());
-
 	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java
@ -0,0 +1,158 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
+
+public class ProjectSubsetTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+
+	private static final Logger log = LoggerFactory
+		.getLogger(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class);
+
+	private static final HashMap<String, String> map = new HashMap<>();
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(
+				eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testAllNew() throws Exception {
+
+		final String projectListPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
+			.getPath();
+
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects")
+			.getPath();
+
+		spark
+			.read()
+			.textFile(projectListPath)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.text(workingDir.toString() + "/projectIds");
+
+		ProjectsSubsetSparkJob.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/projects",
+			"-sourcePath", sourcePath,
+			"-projectListPath", workingDir.toString() + "/projectIds"
+
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Project> tmp = sc
+			.textFile(workingDir.toString() + "/projects")
+			.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
+
+		Assertions.assertEquals(12, tmp.count());
+
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
+		Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
+		Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
+		Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count());
+
+		Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count());
+
+	}
+
+	@Test
+	void testMatchOne() throws Exception {
+
+		final String projectListPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
+			.getPath();
+
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects")
+			.getPath();
+
+		spark
+			.read()
+			.textFile(projectListPath)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.text(workingDir.toString() + "/projectIds");
+
+		ProjectsSubsetSparkJob.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/projects",
+			"-sourcePath", sourcePath,
+			"-projectListPath", workingDir.toString() + "/projectIds"
+
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Project> tmp = sc
+			.textFile(workingDir.toString() + "/projects")
+			.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
+
+		Assertions.assertEquals(11, tmp.count());
+
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
+		Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
+		Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
+		Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count());
+
+		Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count());
+
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java
@ -2,6 +2,7 @@
 package eu.dnetlib.dhp.oa.graph.merge;

 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;

 import java.io.IOException;
 import java.util.Optional;
@ -25,7 +26,23 @@ class MergeGraphTableSparkJobTest {
 	}

 	@Test
-	void testMergeDatasources() throws IOException {
+	void testMerge() throws IOException {
+		Datasource d = MergeGraphTableSparkJob
+			.mergeDatasource(
+				d("datasource_cris.json"),
+				d("datasource_openaire2.0.json"));
+
+		assertEquals("10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", d.getId());
+		assertNotNull(d.getOriginalId());
+		assertEquals(2, d.getOriginalId().size());
+		assertNotNull(d.getCollectedfrom());
+		assertEquals(2, d.getCollectedfrom().size());
+		assertNotNull(d.getPid());
+		assertEquals(1, d.getPid().size());
+	}
+
+	@Test
+	void testMergeCompatibility() throws IOException {
 		assertEquals(
 			"openaire-cris_1.1",
 			MergeGraphTableSparkJob
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
@ -12,8 +12,11 @@ import java.sql.Array;
 import java.sql.Date;
 import java.sql.ResultSet;
 import java.sql.SQLException;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@ -28,12 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
-import eu.dnetlib.dhp.schema.oaf.Datasource;
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.Organization;
-import eu.dnetlib.dhp.schema.oaf.Project;
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;

@ExtendWith(MockitoExtension.class)
@ -63,22 +61,32 @@ public class MigrateDbEntitiesApplicationTest {
 	}

 	@Test
-	public void testProcessDatasource() throws Exception {
-		final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
+	public void testProcessService() throws Exception {
+		final List<TypedField> fields = prepareMocks("services_resultset_entry.json");

-		final List<Oaf> list = app.processDatasource(rs);
+		final List<Oaf> list = app.processService(rs);
 		assertEquals(1, list.size());
 		verifyMocks(fields);

 		final Datasource ds = (Datasource) list.get(0);
 		assertValidId(ds.getId());
-		assertValidId(ds.getCollectedfrom().get(0).getKey());
+		ds
+			.getCollectedfrom()
+			.stream()
+			.map(KeyValue::getKey)
+			.forEach(dsId -> assertValidId(dsId));
+
+		assertEquals(1, ds.getPid().size());
+		assertEquals("r3d100010218", ds.getPid().get(0).getValue());
+		assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
+		assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
+
 		assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
 		assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
-		assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
 		assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
+		assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
+		assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
 		assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
-		assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue());
 		assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
 		assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
 		assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
@ -90,19 +98,98 @@ public class MigrateDbEntitiesApplicationTest {
 		assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
 		assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());

+		assertEquals("Data Source", ds.getEosctype().getClassid());
+		assertEquals("Data Source", ds.getEosctype().getClassname());
+		assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
+		assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
+
+		assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
+		assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
+		assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
+		assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
+
+		assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
+		assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
+		assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
+		assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
+
+		assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue());
+		assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue());
+		assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
+
+		assertEquals(getValueAsString("description", fields), ds.getDescription().getValue());
+
+		// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
+
+		assertEquals("0.0", ds.getOdnumberofitems().getValue());
+		assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
+		assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
+
+		assertEquals(
+			getValueAsList("odlanguages", fields),
+			ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
+		assertEquals(getValueAsList("languages", fields), ds.getLanguages());
+		assertEquals(
+			getValueAsList("accessinfopackage", fields),
+			ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
+		assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
+		assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
+		assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
+
+		assertEquals(null, ds.getDataprovider());
+		assertEquals(null, ds.getServiceprovider());
+
+		assertEquals(getValueAsString("databaseaccesstype", fields), ds.getDatabaseaccesstype());
+		assertEquals(getValueAsString("datauploadtype", fields), ds.getDatauploadtype());
+		assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
+		assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
+
+		assertEquals(false, ds.getVersioning().getValue());
+		assertEquals(false, ds.getVersioncontrol());
+
+		assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
+		assertEquals(getValueAsString("pidsystems", fields), ds.getPidsystems());
+		assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
+
+		assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
+
 		assertEquals("National", ds.getJurisdiction().getClassid());
 		assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());

 		assertTrue(ds.getThematic());
-		assertTrue(ds.getKnowledgegraph());

-		assertEquals(1, ds.getContentpolicies().size());
-		assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
-		assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
+		HashSet<String> cpSchemeId = ds
+			.getContentpolicies()
+			.stream()
+			.map(Qualifier::getSchemeid)
+			.collect(Collectors.toCollection(HashSet::new));
+		assertTrue(cpSchemeId.size() == 1);
+		assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
+		HashSet<String> cpSchemeName = ds
+			.getContentpolicies()
+			.stream()
+			.map(Qualifier::getSchemename)
+			.collect(Collectors.toCollection(HashSet::new));
+		assertTrue(cpSchemeName.size() == 1);
+		assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
+		assertEquals(2, ds.getContentpolicies().size());
+		assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
+		assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
+
+		assertEquals(getValueAsString("submissionpolicyurl", fields), ds.getSubmissionpolicyurl());
+		assertEquals(getValueAsString("preservationpolicyurl", fields), ds.getPreservationpolicyurl());
+
+		assertEquals(
+			getValueAsList("researchproductaccesspolicies", fields),
+			ds.getResearchproductaccesspolicies());
+		assertEquals(
+			getValueAsList("researchproductmetadataaccesspolicies", fields),
+			ds.getResearchproductmetadataaccesspolicies());

 		assertEquals(true, ds.getConsenttermsofuse());
 		assertEquals(true, ds.getFulltextdownload());
 		assertEquals("2022-03-11", ds.getConsenttermsofusedate());
+		assertEquals("2022-03-11", ds.getLastconsenttermsofusedate());
 	}

 	@Test
@ -154,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
 	public void testProcessDatasourceOrganization() throws Exception {
 		final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");

-		final List<Oaf> list = app.processDatasourceOrganization(rs);
+		final List<Oaf> list = app.processServiceOrganization(rs);

 		assertEquals(2, list.size());
 		verifyMocks(fields);
@ -356,18 +443,31 @@ public class MigrateDbEntitiesApplicationTest {
 	}

 	private Float getValueAsFloat(final String name, final List<TypedField> fields) {
-		return new Float(getValueAs(name, fields).toString());
+		final Object value = getValueAs(name, fields);
+		return value != null ? new Float(value.toString()) : null;
+	}
+
+	private Double getValueAsDouble(final String name, final List<TypedField> fields) {
+		final Object value = getValueAs(name, fields);
+		return value != null ? new Double(value.toString()) : null;
+	}
+
+	private Integer getValueAsInt(final String name, final List<TypedField> fields) {
+		final Object value = getValueAs(name, fields);
+		return value != null ? new Integer(value.toString()) : null;
 	}

 	private <T> T getValueAs(final String name, final List<TypedField> fields) {
-		return fields
+		final Optional<T> field = fields
 			.stream()
 			.filter(f -> f.getField().equals(name))
-			.map(TypedField::getValue)
-			.filter(Objects::nonNull)
-			.map(o -> (T) o)
 			.findFirst()
-			.get();
+			.map(TypedField::getValue)
+			.map(o -> (T) o);
+		if (!field.isPresent()) {
+			return null;
+		}
+		return field.get();
 	}

 	private List<String> getValueAsList(final String name, final List<TypedField> fields) {
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala
@ -1,29 +0,0 @@
-package eu.dnetlib.dhp.sx.pangaea
-
-import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils
-import org.junit.jupiter.api.Test
-
-import java.util.TimeZone
-import java.text.SimpleDateFormat
-import java.util.Date
-import scala.io.Source
-class PangaeaTransformTest {
-
-
-
-  @Test
-  def test_dateStamp() :Unit ={
-
-
-
-    val  d = new Date()
-
-    val s:String =  s"${new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")format d}Z"
-    println(s)
-
-
-    val xml = Source.fromInputStream(getClass.getResourceAsStream("input.xml")).mkString
-    println(PangaeaUtils.parseXml(xml))
-  }
-
-}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath/communityMap
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath/communityMap
@ -0,0 +1 @@
+{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/otherresearchproduct
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/otherresearchproduct
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/project
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/project
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/publication
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/publication
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/software
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/software
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/otherresearchproduct
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/otherresearchproduct
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/funderList/list
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/funderList/list
@ -0,0 +1,8 @@
+NSF
+CIHR
+NWO
+NHMRC
+NIH
+MZOS
+SNSF
+EC
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo/resultProject
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo/resultProject
@ -0,0 +1 @@
+{"resultId":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","projectsList":[{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null},{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null}]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects
@ -0,0 +1,12 @@
+{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE  PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects
@ -0,0 +1,12 @@
+{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE  PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId
@ -0,0 +1,28 @@
+40|nih_________::4c32cdbc4c9949853f02219fc4780a30
+40|nih_________::b485512ef116af73bee79d50c8f9ca01
+40|nih_________::b44d9bc8e99d9a0477ac06897e3e9c19
+40|nih_________::7d2d2b7d1644a722a6bbcb031d82fec6
+40|nsf_________::6b2674b0341e07b818a56c6f0daa2633
+40|nih_________::96bb39aecc8f7b9f3b02ed36ef09538b
+40|nsf_________::88d92bdf20ec2fac3ed9740f962b4fad
+40|nih_________::4bb8c14729a0082378bb04db8321ce14
+40|nih_________::08a8eed6c17c6d8e427afcfd29f87c7b
+40|nsf_________::c314f3d35af1990121bf5b803937e112
+40|nih_________::3ad6a2e6ebd561206f0da69468337f50
+40|nih_________::d02c60c65a59629e69a30abcf2ceaed1
+40|nih_________::d5a241cc94253feb72181cde15f51e96
+40|nih_________::b5df718bbca69af50d4b7213e26af3f0
+40|nih_________::bc90893c1be80503578e48f6ef6b7061
+40|rcuk________::2c39b38c26c260b14a9816b88c91c132
+40|nih_________::ab103ad117cd0579df66f7592a7d4adf
+40|nih_________::147aa6ad8bd201e2a02c7b6cc3f68348
+40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856
+40|nih_________::b8083208156f2764d07c736ba9b49dd2
+40|nih_________::f4d1e0aece0e6a9eff8d054c28e082db
+40|nsf_________::56297da8b472a4be8ac3f09af813c9f6
+40|nsf_________::6b6dc3398eeebb3de1ab66e6eb8c5cb3
+40|nih_________::93289a36ebffb0bee3d6b01c6fc0a3d6
+40|nih_________::6c3b00dd4ae9d43d6630ff18f189ebae
+40|nih_________::1d983a87768f13bc8377b1b7d17290a2
+40|nih_________::c3b56e91859b114644c1403e892eb80f
+40|rcuk________::c1e15330fc7956063652f9c06e584548
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_cris.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_cris.json
@ -1 +1,5 @@
-{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire-cris_1.1" }}
+{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire-cris_1.1" },
+  "originalId": ["eurocrisdris::1234"],
+  "collectedfrom": [{"key": "eurocrisdris::2b29d08e383ff4cd8a2b6b226ce37e38", "value": "Directory of Research Information System (DRIS)"}],
+  "pid": [{"value": "10.1010.xyx", "qualifier": {"classid": "doi"}}]
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_openaire2.0.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_openaire2.0.json
@ -1 +1,4 @@
-{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire2.0" }}
+{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire2.0" },
+  "originalId": ["opendoar____::1234"],
+  "collectedfrom": [{"key": "openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}]
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json
@ -1,6 +1,6 @@
 [
 	{
-		"field": "datasource",
+		"field": "service",
 		"type": "string",
 		"value": "openaire____::revistasunicauca"
 	},
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json
@ -1,18 +1,51 @@
 [
 	{
-		"field": "datasourceid",
+		"field": "id",
 		"type": "string",
 		"value": "274269ac6f3b::2579-5449"
 	},
 	{
-		"field": "identities",
+		"field": "originalid",
 		"type": "array",
 		"value": [
-			"274269ac6f3b::2579-5449",
+			"fairsharing_::1562",
 			"piwik:13",
-			null
+			null,
+			"re3data_____::r3d100010213"
 		]
 	},
+	{
+		"field": "pid",
+		"type": "array",
+		"value": [
+			"r3d100010218###re3data@@@dnet:pid_types"
+		]
+	},
+	{
+		"field": "datasourcetype",
+		"type": "string",
+		"value": "pubsrepository::journal@@@dnet:datasource_typologies"
+	},
+	{
+		"field": "datasourcetypeui",
+		"type": "string",
+		"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
+	},
+	{
+		"field": "eosctype",
+		"type": "string",
+		"value": "Data Source@@@dnet:eosc_types"
+	},
+	{
+		"field": "eoscdatasourcetype",
+		"type": "string",
+		"value": "Journal archive@@@dnet:eosc_datasource_types"
+	},
+	{
+		"field": "openairecompatibility",
+		"type": "string",
+		"value": "openaire4.0@@@dnet:datasourceCompatibilityLevel"
+	},
 	{
 		"field": "officialname",
 		"type": "string",
@ -23,16 +56,6 @@
 		"type": "string",
 		"value": "Jurnal Ilmiah Pendidikan Scholastic"
 	},
-	{
-		"field": "contactemail",
-		"type": "string",
-		"value": "test@test.it"
-	},
-	{
-		"field": "openairecompatibility",
-		"type": "string",
-		"value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
-	},
 	{
 		"field": "websiteurl",
 		"type": "string",
@ -44,11 +67,14 @@
 		"value": null
 	},
 	{
-		"field": "accessinfopackage",
-		"type": "array",
-		"value": [
-			null
-		]
+		"field": "contactemail",
+		"type": "string",
+		"value": "test@test.it"
+	},
+	{
+		"field": "namespaceprefix",
+		"type": "string",
+		"value": "ojs_25795449"
 	},
 	{
 		"field": "latitude",
@ -61,9 +87,19 @@
 		"value": 0
 	},
 	{
-		"field": "namespaceprefix",
+		"field": "dateofvalidation",
+		"type": "date",
+		"value": null
+	},
+	{
+		"field": "description",
 		"type": "string",
-		"value": "ojs_25795449"
+		"value": "veterinary medicine"
+	},
+	{
+		"field": "subjects",
+		"type": "array",
+		"value": []
 	},
 	{
 		"field": "odnumberofitems",
@ -75,16 +111,6 @@
 		"type": "date",
 		"value": null
 	},
-	{
-		"field": "subjects",
-		"type": "array",
-		"value": null
-	},
-	{
-		"field": "description",
-		"type": "string",
-		"value": null
-	},
 	{
 		"field": "odpolicies",
 		"type": "string",
@ -93,44 +119,33 @@
 	{
 		"field": "odlanguages",
 		"type": "array",
-		"value": []
-	},
-	{
-		"field": "odcontenttypes",
-		"type": "array",
 		"value": [
-			"Journal articles"
+			"English",
+			"German",
+			"French",
+			"Danish",
+			"Norwegian",
+			"Swedish"
 		]
 	},
 	{
-		"field": "inferred",
-		"type": "boolean",
-		"value": false
+		"field": "languages",
+		"type": "array",
+		"value": [
+			"English",
+			"German",
+			"French",
+			"Danish",
+			"Norwegian",
+			"Swedish"
+		]
 	},
 	{
-		"field": "deletedbyinference",
-		"type": "boolean",
-		"value": false
-	},
-	{
-		"field": "trust",
-		"type": "double",
-		"value": 0.9
-	},
-	{
-		"field": "inferenceprovenance",
-		"type": "string",
-		"value": null
-	},
-	{
-		"field": "dateofcollection",
-		"type": "date",
-		"value": "2020-01-21"
-	},
-	{
-		"field": "dateofvalidation",
-		"type": "date",
-		"value": null
+		"field": "accessinfopackage",
+		"type": "array",
+		"value": [
+			"http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai"
+		]
 	},
 	{
 		"field": "releasestartdate",
@ -147,16 +162,6 @@
 		"type": "string",
 		"value": null
 	},
-	{
-		"field": "dataprovider",
-		"type": "boolean",
-		"value": null
-	},
-	{
-		"field": "serviceprovider",
-		"type": "boolean",
-		"value": null
-	},
 	{
 		"field": "databaseaccesstype",
 		"type": "string",
@ -183,12 +188,12 @@
 		"value": null
 	},
 	{
-		"field": "citationguidelineurl",
-		"type": "string",
+		"field": "versioncontrol",
+		"type": "boolean",
 		"value": null
 	},
 	{
-		"field": "qualitymanagementkind",
+		"field": "citationguidelineurl",
 		"type": "string",
 		"value": null
 	},
@ -208,29 +213,38 @@
 		"value": []
 	},
 	{
-		"field": "collectedfromid",
-		"type": "string",
-		"value": "openaire____::SnVybmFsIEZha3VsdGFzIFNhc3RyYSBVbml2ZXJzaXRhcyBFa2FzYWt0aQ=="
+		"field": "inferred",
+		"type": "boolean",
+		"value": false
 	},
 	{
-		"field": "collectedfromname",
-		"type": "string",
-		"value": "Jurnal Fakultas Sastra Universitas Ekasakti"
+		"field": "deletedbyinference",
+		"type": "boolean",
+		"value": false
 	},
 	{
-		"field": "datasourcetype",
-		"type": "string",
-		"value": "pubsrepository::journal@@@dnet:datasource_typologies"
+		"field": "trust",
+		"type": "double",
+		"value": 0.9
 	},
 	{
-		"field": "datasourcetypeui",
+		"field": "inferenceprovenance",
 		"type": "string",
-		"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
+		"value": null
 	},
 	{
-		"field": "provenanceaction",
-		"type": "not_used",
-		"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
+		"field": "dateofcollection",
+		"type": "date",
+		"value": "2020-01-21"
+	},
+	{
+		"field": "collectedfrom",
+		"type": "array",
+		"value": [
+			"openaire____::fairsharing@@@FAIRsharing.org",
+			"openaire____::opendoar@@@OpenDOAR",
+			"openaire____::re3data@@@Registry of Research Data Repository"
+		]
 	},
 	{
 		"field": "issnPrinted",
@ -247,6 +261,13 @@
 		"type": "string",
 		"value": "2579-5447"
 	},
+	{
+		"field": "researchentitytypes",
+		"type": "array",
+		"value": [
+			"Research Data"
+		]
+	},
 	{
 		"field": "jurisdiction",
 		"type": "string",
@ -257,16 +278,36 @@
 		"type": "boolean",
 		"value": true
 	},
-	{
-		"field": "knowledgegraph",
-		"type": "boolean",
-		"value": true
-	},
 	{
 		"field": "contentpolicies",
 		"type": "array",
 		"value": [
-			"Journal article@@@eosc:contentpolicies"
+			"Taxonomic classification@@@eosc:contentpolicies",
+			"Resource collection@@@eosc:contentpolicies"
+		]
+	},
+	{
+		"field": "submissionpolicyurl",
+		"type": "string",
+		"value": null
+	},
+	{
+		"field": "preservationpolicyurl",
+		"type": "string",
+		"value": "Permanent Archiving https://datadryad.org/stash/faq"
+	},
+	{
+		"field": "researchproductaccesspolicies",
+		"type": "array",
+		"value": [
+			"https://100percentit.com/legal/"
+		]
+	},
+	{
+		"field": "researchproductmetadataaccesspolicies",
+		"type": "array",
+		"value": [
+			"https://wenmr.science.uu.nl/conditions"
 		]
 	},
 	{
@ -283,5 +324,10 @@
 		"field": "consenttermsofusedate",
 		"type": "date",
 		"value": "2022-03-11"
+	},
+	{
+		"field": "lastconsenttermsofusedate",
+		"type": "date",
+		"value": "2022-03-11"
 	}
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/datasource/datasource_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/datasource/datasource_10.json.gz
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -535,6 +535,12 @@ public class XmlRecordFactory implements Serializable {
 				if (ds.getDatasourcetypeui() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
 				}
+				if (ds.getEosctype() != null) {
+					metadata.add(XmlSerializationUtils.mapQualifier("eosctype", ds.getEosctype()));
+				}
+				if (ds.getEoscdatasourcetype() != null) {
+					metadata.add(XmlSerializationUtils.mapQualifier("eoscdatasourcetype", ds.getEoscdatasourcetype()));
+				}
 				if (ds.getOpenairecompatibility() != null) {
 					metadata
 						.add(
@ -583,6 +589,16 @@ public class XmlRecordFactory implements Serializable {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
 				}
+				if (ds.getSubjects() != null) {
+					metadata
+						.addAll(
+							ds
+								.getSubjects()
+								.stream()
+								.filter(Objects::nonNull)
+								.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
+								.collect(Collectors.toList()));
+				}
 				if (ds.getOdnumberofitems() != null) {
 					metadata
 						.add(
@ -609,6 +625,16 @@ public class XmlRecordFactory implements Serializable {
 								.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
 								.collect(Collectors.toList()));
 				}
+				if (ds.getLanguages() != null) {
+					metadata
+						.addAll(
+							ds
+								.getLanguages()
+								.stream()
+								.filter(Objects::nonNull)
+								.map(c -> XmlSerializationUtils.asXmlElement("languages", c))
+								.collect(Collectors.toList()));
+				}
 				if (ds.getOdcontenttypes() != null) {
 					metadata
 						.addAll(
@ -689,18 +715,18 @@ public class XmlRecordFactory implements Serializable {
 							XmlSerializationUtils
 								.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
 				}
+				if (ds.getVersioncontrol() != null) {
+					metadata
+						.add(
+							XmlSerializationUtils
+								.asXmlElement("versioncontrol", ds.getVersioncontrol().toString()));
+				}
 				if (ds.getCitationguidelineurl() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils
 								.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
 				}
-				if (ds.getQualitymanagementkind() != null) {
-					metadata
-						.add(
-							XmlSerializationUtils
-								.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
-				}
 				if (ds.getPidsystems() != null) {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
@ -722,17 +748,24 @@ public class XmlRecordFactory implements Serializable {
 				if (ds.getJournal() != null) {
 					metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
 				}
-				if (ds.getSubjects() != null) {
+				if (ds.getResearchentitytypes() != null) {
 					metadata
 						.addAll(
 							ds
-								.getSubjects()
+								.getResearchentitytypes()
 								.stream()
-								.filter(Objects::nonNull)
-								.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
+								.map(c -> XmlSerializationUtils.asXmlElement("researchentitytypes", c))
+								.collect(Collectors.toList()));
+				}
+				if (ds.getProvidedproducttypes() != null) {
+					metadata
+						.addAll(
+							ds
+								.getProvidedproducttypes()
+								.stream()
+								.map(c -> XmlSerializationUtils.asXmlElement("providedproducttypes", c))
 								.collect(Collectors.toList()));
 				}
-
 				if (ds.getJurisdiction() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
 				}
@ -741,11 +774,6 @@ public class XmlRecordFactory implements Serializable {
 					metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
 				}

-				if (ds.getKnowledgegraph() != null) {
-					metadata
-						.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
-				}
-
 				if (ds.getContentpolicies() != null) {
 					metadata
 						.addAll(
@ -756,7 +784,34 @@ public class XmlRecordFactory implements Serializable {
 								.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
 								.collect(Collectors.toList()));
 				}
-
+				if (ds.getSubmissionpolicyurl() != null) {
+					metadata
+						.add(XmlSerializationUtils.asXmlElement("submissionpolicyurl", ds.getSubmissionpolicyurl()));
+				}
+				if (ds.getPreservationpolicyurl() != null) {
+					metadata
+						.add(
+							XmlSerializationUtils.asXmlElement("preservationpolicyurl", ds.getPreservationpolicyurl()));
+				}
+				if (ds.getResearchproductaccesspolicies() != null) {
+					metadata
+						.addAll(
+							ds
+								.getResearchproductaccesspolicies()
+								.stream()
+								.map(c -> XmlSerializationUtils.asXmlElement("researchproductaccesspolicies", c))
+								.collect(Collectors.toList()));
+				}
+				if (ds.getResearchproductmetadataaccesspolicies() != null) {
+					metadata
+						.addAll(
+							ds
+								.getResearchproductmetadataaccesspolicies()
+								.stream()
+								.map(
+									c -> XmlSerializationUtils.asXmlElement("researchproductmetadataaccesspolicies", c))
+								.collect(Collectors.toList()));
+				}
 				break;
 			case organization:
 				final Organization o = (Organization) entity;
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/provision/scholix_index.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/provision/scholix_index.json
@ -41,7 +41,8 @@
        }
      },
      "publicationDate": {
-        "type": "keyword"
+        "type":   "date",
+        "format": "yyyy-MM-dd"
      },
      "relationship": {
        "properties": {
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -11,6 +11,7 @@ import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
+import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.Test;

@ -142,7 +143,7 @@ public class XmlRecordFactoryTest {
 	}

 	@Test
-	public void testDatasource() throws IOException, DocumentException {
+	public void testService() throws IOException, DocumentException {
 		final ContextMapper contextMapper = new ContextMapper();

 		final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
@ -167,6 +168,14 @@ public class XmlRecordFactoryTest {
 		assertEquals("true", doc.valueOf("//thematic"));
 		assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname"));
 		assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname"));
+		assertEquals("Data Source", doc.valueOf("//eosctype/@classname"));

+		final List pids = doc.selectNodes("//pid");
+		assertEquals(1, pids.size());
+		assertEquals("re3data", ((Element) pids.get(0)).attribute("classid").getValue());
+		assertEquals(
+			"Registry of research data repositories", ((Element) pids.get(0)).attribute("classname").getValue());
+		assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue());
+		assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue());
 	}
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@ -80,4 +80,34 @@ where reltype='resultResult'
    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
-    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
+    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
+
+create table ${stats_db_name}.result_citations_oc stored as parquet as
+select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
+from ${openaire_db_name}.relation rel
+join ${openaire_db_name}.result r1 on rel.source=r1.id
+join ${openaire_db_name}.result r2 on r2.id=rel.target
+where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations'
+    and reltype='resultResult'
+    and r1.resulttype.classname!=r2.resulttype.classname
+    and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
+    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
+    and r1.resulttype.classname != 'other'
+    and r2.resulttype.classname != 'other'
+    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
+group by substr(target, 4);
+
+create table ${stats_db_name}.result_references_oc stored as parquet as
+select substr(source, 4) as id, count(distinct substr(target, 4)) as references
+from ${openaire_db_name}.relation rel
+         join ${openaire_db_name}.result r1 on rel.source=r1.id
+         join ${openaire_db_name}.result r2 on r2.id=rel.target
+where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations'
+  and reltype='resultResult'
+  and r1.resulttype.classname!=r2.resulttype.classname
+    and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
+    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
+    and r1.resulttype.classname != 'other'
+    and r2.resulttype.classname != 'other'
+    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
+group by substr(source, 4);
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@ -12,6 +12,8 @@ and (ri.accessright = 'Open Access'
 or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
 on p.id= tmp.id;

+compute stats indi_pub_green_oa;
+
 create table indi_pub_grey_lit stored as parquet as
 select distinct p.id, coalesce(grey_lit, 0) as grey_lit
 from publication p
@ -22,6 +24,8 @@ join result_classifications rt on rt.id = p.id
 where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
 not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;

+compute stats indi_pub_grey_lit;
+
 create table indi_pub_doi_from_crossref stored as parquet as
 select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
 from publication p
@ -31,6 +35,7 @@ join datasource d on d.id = ri.collectedfrom
 where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
 on tmp.id=p.id;

+compute stats indi_pub_doi_from_crossref;
 ---- Sprint 2 ----
 create table indi_result_has_cc_licence stored as parquet as
 select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
@ -40,6 +45,8 @@ join result_licenses as license on license.id = r.id
 where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
 on r.id= tmp.id;

+compute stats indi_result_has_cc_licence;
+
 create table indi_result_has_cc_licence_url stored as parquet as
 select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
 from result r
@ -49,16 +56,21 @@ join result_licenses as license on license.id = r.id
 WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
 on r.id= tmp.id;

+compute stats indi_result_has_cc_licence_url;
+
 create table indi_pub_has_abstract stored as parquet as
 select distinct publication.id, coalesce(abstract, 1) has_abstract
 from publication;

+compute stats indi_pub_has_abstract;
+
 create table indi_result_with_orcid stored as parquet as
 select distinct r.id, coalesce(has_orcid, 0) as has_orcid
 from result r
 left outer join (select id, 1 as has_orcid from result_orcid) tmp
 on r.id= tmp.id;

+compute stats indi_result_with_orcid;

 ---- Sprint 3 ----
 create table indi_funded_result_with_fundref stored as parquet as
@ -68,27 +80,33 @@ left outer join (select distinct id, 1 as fundref from project_results
 where provenance='Harvested') tmp
 on r.id= tmp.id;

-create table indi_result_org_country_collab stored as parquet as
-with tmp as
-(select o.id as id, o.country , ro.id as result,r.type  from organization o
-join result_organization ro on o.id=ro.organization
-join result r on r.id=ro.id where o.country <> 'UNKNOWN')
-select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
-from tmp as o1
-join tmp as o2 on o1.result=o2.result
-where o1.id<>o2.id and o1.country<>o2.country
-group by o1.id, o1.type,o2.country;
+compute stats indi_funded_result_with_fundref;

-create table indi_result_org_collab stored as parquet as
-with tmp as
-(select o.id, ro.id as result,r.type  from organization o
-join result_organization ro on o.id=ro.organization
-join result r on r.id=ro.id)
-select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations
-from tmp as o1
-join tmp as o2 on o1.result=o2.result
-where o1.id<>o2.id
-group by o1.id, o2.id, o1.type;
+-- create table indi_result_org_country_collab stored as parquet as
+-- with tmp as
+-- (select o.id as id, o.country , ro.id as result,r.type  from organization o
+-- join result_organization ro on o.id=ro.organization
+-- join result r on r.id=ro.id where o.country <> 'UNKNOWN')
+-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
+-- from tmp as o1
+-- join tmp as o2 on o1.result=o2.result
+-- where o1.id<>o2.id and o1.country<>o2.country
+-- group by o1.id, o1.type,o2.country;
+--
+-- compute stats indi_result_org_country_collab;
+
+-- create table indi_result_org_collab stored as parquet as
+-- with tmp as
+-- (select o.id, ro.id as result,r.type  from organization o
+-- join result_organization ro on o.id=ro.organization
+-- join result r on r.id=ro.id)
+-- select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations
+-- from tmp as o1
+-- join tmp as o2 on o1.result=o2.result
+-- where o1.id<>o2.id
+-- group by o1.id, o2.id, o1.type;
+--
+-- compute stats indi_result_org_collab;

 create table indi_funder_country_collab stored as parquet as
 with tmp as (select funder, project, country from organization_projects op
@ -101,6 +119,8 @@ join tmp as f2 on f1.project=f2.project
 where f1.country<>f2.country
 group by f1.funder, f2.country, f1.country;

+compute stats indi_funder_country_collab;
+
 create table indi_result_country_collab stored as parquet as
 with tmp as
 (select country, ro.id as result,r.type  from organization o
@ -112,6 +132,8 @@ join tmp as o2 on o1.result=o2.result
 where o1.country<>o2.country
 group by o1.country, o2.country, o1.type;

+compute stats indi_result_country_collab;
+
 ---- Sprint 4 ----
 create table indi_pub_diamond stored as parquet as
 select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
@ -123,6 +145,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
 and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
 on pd.id=tmp.id;

+compute stats indi_pub_diamond;
+
 create table indi_pub_hybrid stored as parquet as
 select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
 from publication_datasources pd
@ -133,6 +157,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
 and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
 on pd.id=tmp.id;

+compute stats indi_pub_hybrid;
+
 create table indi_pub_in_transformative stored as parquet as
 select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
 from publication pd
@ -143,6 +169,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
 and ps.is_transformative_journal=true) tmp
 on pd.id=tmp.id;

+compute stats indi_pub_in_transformative;
+
 create table indi_pub_closed_other_open stored as parquet as
 select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
 left outer join
@ -153,11 +181,12 @@ where d.type like '%Journal%' and ri.accessright='Closed Access' and
 (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
 on tmp.id=ri.id;

-
+compute stats indi_pub_closed_other_open;
 ---- Sprint 5 ----
 create table indi_result_no_of_copies stored as parquet as
 select id, count(id) as number_of_copies from result_instance group by id;

+compute stats indi_result_no_of_copies;
 ---- Sprint 6 ----
 create table indi_pub_gold_oa stored as parquet as
 WITH gold_oa AS (
@ -183,6 +212,8 @@ LEFT OUTER JOIN (
    JOIN issn on issn.id=pd.datasource
    JOIN gold_oa  on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;

+compute stats indi_pub_gold_oa;
+
 create table indi_datasets_gold_oa stored as parquet as
 WITH gold_oa AS (
    SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -210,6 +241,8 @@ LEFT OUTER JOIN (
    JOIN issn on issn.id=pd.datasource
    JOIN gold_oa  on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;

+compute stats indi_datasets_gold_oa;
+
 create table indi_software_gold_oa stored as parquet as
 WITH gold_oa AS (
    SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -237,6 +270,8 @@ LEFT OUTER JOIN (
    JOIN issn on issn.id=pd.datasource
    JOIN gold_oa  on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;

+compute stats indi_software_gold_oa;
+
 create table indi_org_findable stored as parquet as
 with result_with_pid as (
    select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
@ -263,6 +298,8 @@ join result_with_pid_share on result_with_pid_share.organization=allresults.orga
 left outer join (
    select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization;

+compute stats indi_org_findable;
+
 create table indi_org_openess stored as parquet as
 WITH datasets_oa as (
    SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg
@ -313,6 +350,8 @@ left outer join (
 left outer join (
    select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization;

+compute stats indi_org_openess;
+
 create table indi_pub_hybrid_oa_with_cc stored as parquet as
 WITH hybrid_oa AS (
    SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -343,6 +382,8 @@ LEFT OUTER JOIN (
    JOIN indi_result_has_cc_licence cc on pd.id=cc.id
    where cc.has_cc_license=1) tmp on pd.id=tmp.id;

+compute stats indi_pub_hybrid_oa_with_cc;
+
 create table indi_pub_downloads stored as parquet as
 SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
 join publication on result_id=id
@ -350,6 +391,8 @@ where downloads>0
 GROUP BY result_id
 order by no_dowloads desc;

+compute stats indi_pub_downloads;
+
 create table indi_pub_downloads_datasource stored as parquet as
 SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
 join publication on result_id=id
@ -357,15 +400,21 @@ where downloads>0
 GROUP BY result_id, repository_id
 order by result_id;

+compute stats indi_pub_downloads_datasource;
+
 create table indi_pub_downloads_year stored as parquet as
 SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
 join publication on result_id=id where downloads>0
 GROUP BY result_id, `year`
 order by `year` asc;

+compute stats indi_pub_downloads_year;
+
 create table indi_pub_downloads_datasource_year stored as parquet as
 SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
 join publication on result_id=id
 where downloads>0
 GROUP BY result_id, repository_id, `year`
 order by `year` asc, result_id;
+
+compute stats indi_pub_downloads_datasource_year;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@ -18,28 +18,45 @@ create table TARGET.result stored as parquet as
        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
        union all
        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
-            'openorgs____::759d59f05d77188faee99b7493b46805',
-            'openorgs____::b84450f9864182c67b8611b5593f4250',
-            'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975',
-            'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
-            'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2',
-            'openorgs____::d169c7407dd417152596908d48c11460',
-            'openorgs____::1ec924b1759bb16d0a02f2dad8689b21',
-            'openorgs____::2fb1e47b4612688d9de9169d579939a7',
-            'openorgs____::759d59f05d77188faee99b7493b46805',
-            'openorgs____::cad284878801b9465fa51a95b1d779db',
-            'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
-            'openorgs____::c0286313e36479eff8676dba9b724b40'
-            -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot
-            ) )) foo;
+             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
+             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
+             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
+             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
+             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
+             'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki
+             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
+             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
+             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
+             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
+             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
+             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
+             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
+             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
+             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
+             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
+             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
+             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
+             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
+             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
+             'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII)
+        ) )) foo;
 compute stats TARGET.result;

 create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_citations;

+create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_references_oc;
+
+create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_citations_oc;
+
 create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_classifications;

+create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_apc;
+
 create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_concepts;

--- a/pom.xml
+++ b/pom.xml
@ -801,7 +801,7 @@
 		<mockito-core.version>3.3.3</mockito-core.version>
 		<mongodb.driver.version>3.4.2</mongodb.driver.version>
 		<vtd.version>[2.12,3.0)</vtd.version>
-		<dhp-schemas.version>[2.10.32]</dhp-schemas.version>
+		<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
 		<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
 		<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
 		<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
Author	SHA1	Message	Date
Miriam Baglioni	e9384526c6	merge branch with master	2022-06-21 14:27:47 +02:00
Miriam Baglioni	d62ca8392b	[DUMP] change in the community subworkflow workflow to remove the no more needed subworkflow in common with funders dump	2022-06-21 14:26:24 +02:00
Miriam Baglioni	8d372f1be7	refactoring	2022-06-21 14:23:40 +02:00
Miriam Baglioni	fdde309f59	refactoring	2022-06-21 14:13:35 +02:00
Miriam Baglioni	2d54a68cde	merge branch with master	2022-06-09 18:25:56 +02:00
Miriam Baglioni	2a77ebb431	resolving conflicts	2022-06-06 11:54:33 +02:00
Miriam Baglioni	108e17644e	mergin with branch beta	2022-05-24 18:37:32 +02:00
Claudio Atzori	5c2949a864	Merge pull request '[stats wf] added open citations & more orgs in monitor, removed collab indicator' (#213 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #213	2022-05-20 11:38:43 +02:00
Miriam Baglioni	5e0b8f9b5f	[CountryPropagation] refactoring	2022-05-20 09:15:53 +02:00
Miriam Baglioni	c298c148cb	[CountryPropagation] fix NPE issue	2022-05-20 09:11:46 +02:00
Miriam Baglioni	eaf9385ae5	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2022-05-17 15:09:37 +02:00
Miriam Baglioni	f5207885e3	[EOSCTag] changed code to remove EOSC Jupyter Notebook and modified test to exclude galaxy + software from the tagging for Galaxy	2022-05-17 15:09:22 +02:00
Claudio Atzori	d098ad0d93	[hb patch] updated map	2022-05-16 15:54:04 +02:00
Claudio Atzori	6442763f97	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2022-05-16 12:07:45 +02:00
Claudio Atzori	997c50078e	[graph grouping] drop relation target path before copying from source	2022-05-16 12:07:40 +02:00
Sandro La Bruzzo	c1971d52c4	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2022-05-16 10:30:35 +02:00
Sandro La Bruzzo	4c50f35c8b	update publication Date format	2022-05-16 10:29:36 +02:00
Claudio Atzori	0dc33ea391	[openorgs] fixed parent/child query, using the correct semantic labels	2022-05-16 09:20:30 +02:00
Antonis Lempesis	8160763330	fixed conflict	2022-05-13 14:29:31 +03:00
Antonis Lempesis	3fc9efeab6	fixed typo, addded open citations and apcs in monitor	2022-05-13 14:28:13 +03:00
Miriam Baglioni	e4eac1d20b	[EOSC TAG] added code to remove EOSC Jupyter Notebook from subjects and put EOSC as classid in the qualifier	2022-05-13 11:01:33 +02:00
Antonis Lempesis	c25134f28d	fixed typo	2022-05-12 14:55:47 +03:00
Miriam Baglioni	ba642d53ff	mergin with branch beta	2022-05-12 12:48:00 +02:00
Sandro La Bruzzo	22f65680b9	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2022-05-11 15:30:12 +02:00
Sandro La Bruzzo	ca8d26bcb4	added better filter for openCitations	2022-05-11 15:29:57 +02:00
Claudio Atzori	5d3b4a9c25	[graph merge beta] merge datasource originalid, collectedfrom, and pid lists	2022-05-11 14:13:06 +02:00
Antonis Lempesis	23334479bb	removed yet another collab, added more orgs in monitor	2022-05-11 13:05:52 +03:00
Claudio Atzori	77bc9863e9	[openorgs] mapping parent/child relations without massaging the semantic labels	2022-05-09 16:06:04 +02:00
Claudio Atzori	378020e30a	[eosc_services] unit test adaptation	2022-05-09 16:05:06 +02:00
Miriam Baglioni	89657a0b78	[UsageCount] refactoring	2022-05-09 14:43:27 +02:00
Miriam Baglioni	a056f59c6e	[UsageCount] make it as an action set as it should be, plus changed the test to make them work as well now	2022-05-09 12:51:35 +02:00
Antonis Lempesis	61b4c19e65	restored indi_result_org_country_collab, removed indi_result_org_collab	2022-05-06 12:52:10 +03:00
Antonis Lempesis	cfbbcaf7c4	commented out indi_result_org_country_collab	2022-05-06 12:49:36 +03:00
Claudio Atzori	658450d9a3	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta	2022-05-05 11:38:08 +02:00
Claudio Atzori	846975c886	[eosc_services] using the correct 'keyword' subject type, as declared in the dnet:subject_classification_typologies vocabulary	2022-05-05 11:37:58 +02:00
Miriam Baglioni	5fe25cc51c	Merge pull request '[eosc tag] set the eosc subjects, rough implementation' (#215 ) from eosc_tag into beta Reviewed-on: #215	2022-05-04 10:11:14 +02:00
Miriam Baglioni	8a72de4011	[EOSCTag] modified workflow to execute all the steps and not only the last one	2022-05-04 10:10:56 +02:00
Miriam Baglioni	bd1108f98b	mergin with branch beta	2022-05-04 10:06:56 +02:00
Miriam Baglioni	3aeedd931a	[EOSCTag] fixed issue in case description is null. Modified test resources and classes	2022-05-04 10:06:38 +02:00
Claudio Atzori	da611cfbbd	[eosc_services] resolved merge conflicts	2022-05-03 13:37:15 +02:00
Claudio Atzori	9e12cb3c92	EOSC Services - removed field knowledgegraph; depending on the released schema module	2022-05-03 11:55:45 +02:00
Miriam Baglioni	a21fe310e5	[EOSCTag] last test and change in the implementation to search in title and descriptio	2022-05-02 17:43:20 +02:00
Claudio Atzori	2ade69dea6	EOSC Services - minor	2022-05-02 17:03:31 +02:00
Claudio Atzori	b6a7ff3a99	EOSC Services - removed fields from mapping, testing preparation	2022-05-02 15:52:33 +02:00
Miriam Baglioni	e37177e1ce	mergin with branch beta	2022-05-02 12:31:50 +02:00
Claudio Atzori	a8c51f6f16	EOSC Services - fixed query and testing preparation	2022-05-02 11:09:03 +02:00
Claudio Atzori	05c1ea92e9	EOSC Services - added Service-specific fields in the XML record serialization	2022-04-29 15:56:55 +02:00
Claudio Atzori	f5f532d134	EOSC Services - ongoing update	2022-04-29 12:25:24 +02:00
Claudio Atzori	5ffc24d1ba	EOSC Services - ongoing update	2022-04-26 16:18:41 +02:00
Sandro La Bruzzo	78015a5733	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2022-04-26 09:56:34 +02:00
Sandro La Bruzzo	8c22e5c30a	added fix to include date array with only year or year and month	2022-04-26 09:56:27 +02:00
Miriam Baglioni	e342ec93f0	[EOSCTag] prepared resources for test	2022-04-22 18:35:37 +02:00
Miriam Baglioni	88562c0930	[EOSC TAG] added test for galaxy for title and description criterias	2022-04-22 18:35:03 +02:00
Miriam Baglioni	dfbd2bcbea	[EOSC TAG] added logic in case subject is null	2022-04-22 18:34:03 +02:00
Miriam Baglioni	27c85e901a	[EOSCTag] added resources and finalized test for Jupyter Notebook tagging	2022-04-22 17:38:10 +02:00
Miriam Baglioni	87bff36d9e	mergin with branch beta	2022-04-22 15:52:34 +02:00
Claudio Atzori	81242538e6	Merge pull request 'Oozie workflow for cleancontext' (#216 ) from cleancontext into beta Reviewed-on: #216 Looks good. We need to extend the cleaning workflow parameters to enable the extra step only when it is needed.	2022-04-22 15:46:40 +02:00
Miriam Baglioni	911ce0780a	Merge branch 'cleancontext' of https://code-repo.d4science.org/D-Net/dnet-hadoop into cleancontext	2022-04-22 15:41:42 +02:00
Miriam Baglioni	19d90658fc	[Clean Context] added description to parameters	2022-04-22 15:41:23 +02:00
Claudio Atzori	54162f5c4f	Merge branch 'beta' into cleancontext	2022-04-22 11:49:33 +02:00
Miriam Baglioni	bbb77052d3	[EOSCTag] first test	2022-04-22 11:32:57 +02:00
Sandro La Bruzzo	a82ec3aaaf	code formatter	2022-04-22 11:08:13 +02:00
Sandro La Bruzzo	aa12429f50	Modified last intersection since we lost many titles.	2022-04-22 11:05:08 +02:00
Miriam Baglioni	7cb7066472	[EoscTag] first "rough" implementation	2022-04-22 10:44:17 +02:00
Miriam Baglioni	e0915061c2	[Clean Context] fixed issue in param name	2022-04-21 16:32:40 +02:00
Miriam Baglioni	6dc68c48e0	[EOSCTag] -	2022-04-21 16:19:04 +02:00
Miriam Baglioni	9a961a0092	[Clean Context] fixed issue in param name	2022-04-21 15:12:24 +02:00
Claudio Atzori	29150a5d0c	code formatting	2022-04-21 13:31:56 +02:00
Miriam Baglioni	5b7d9e741c	[Clean Context] added logic to cleaning workflow to accomodate also context cleaning	2022-04-21 13:02:14 +02:00
Miriam Baglioni	ccba1a3db1	[Clean Context] added logic to cleaning workflow to accomodate also context cleaning	2022-04-21 13:00:06 +02:00
Claudio Atzori	a289c9eae2	Merge pull request '[Measures] added new measure (UsageCounts)' (#214 ) from eosc_dimitris into beta Reviewed-on: #214	2022-04-21 12:19:18 +02:00
Miriam Baglioni	20de75ca64	[Measures] removed typo	2022-04-21 12:14:03 +02:00
Miriam Baglioni	bebb2a0560	Merge branch 'eosc_dimitris' of https://code-repo.d4science.org/D-Net/dnet-hadoop into eosc_dimitris	2022-04-21 12:10:19 +02:00
Miriam Baglioni	b61efd613b	[Measures] addressed comments in the PR	2022-04-21 12:09:37 +02:00
Miriam Baglioni	d012d125d7	[EOSCTag] -	2022-04-21 12:02:09 +02:00
Claudio Atzori	88acad76f9	Merge branch 'beta' into eosc_dimitris	2022-04-21 12:00:03 +02:00
Miriam Baglioni	c304657d91	[Measures] put the logic in common, no need to change the schema	2022-04-21 11:27:26 +02:00
Miriam Baglioni	b33156c2ee	[Dump] remove non needed class	2022-04-21 11:12:35 +02:00
Miriam Baglioni	5295effc96	[Measures] fixed issue	2022-04-20 16:20:40 +02:00
Miriam Baglioni	61c0266a44	Merge pull request 'Remove Context from result' (#208 ) from cleancontext into beta Reviewed-on: #208	2022-04-20 15:45:32 +02:00
Miriam Baglioni	a38f0f5ea7	mergin with branch beta	2022-04-20 15:44:18 +02:00
Miriam Baglioni	dbfbe8841a	[Clean Context] changed the description in input parameters	2022-04-20 15:41:03 +02:00
Miriam Baglioni	5feae77937	[Measures] last changes to accomodate tests	2022-04-20 15:13:09 +02:00
Miriam Baglioni	869407c6e2	[Measures] added new measure (usagecounts) as action set. Measure added at the level of the result. Ref #7587	2022-04-20 14:02:05 +02:00
Antonis Lempesis	b7cd2c6ca1	added open citations	2022-04-20 14:46:55 +03:00
Michele Artini	c96a8613f8	update SQL queries	2022-04-20 12:07:49 +02:00
Michele Artini	4314db55c8	migration to services: update sql queries	2022-04-19 15:05:02 +02:00
Miriam Baglioni	38b8d324af	merge branch with master	2022-04-13 17:46:48 +02:00
Claudio Atzori	73c172926a	[Doiboost] fixed fundingReference extraction from the Crossref records	2022-04-12 10:25:42 +02:00
Claudio Atzori	48b580b45c	[graph enrichment] fixed country_propagation oozie workflow definition, parameter saveGraph is not needed anymore by the SparkCountryPropagationJob	2022-04-11 08:52:36 +02:00
Claudio Atzori	4eff7856f5	Merge pull request '[stats-wf] computing stats in each step' (#210 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #210	2022-04-08 14:21:01 +02:00
Miriam Baglioni	1a0615125f	[Graph DUMP] fixed issue in workflow	2022-04-07 15:28:59 +02:00
Claudio Atzori	91e32f12ed	Merge branch 'master' into beta	2022-04-07 13:37:58 +02:00
Miriam Baglioni	1e251d34a1	[Graph DUMP] add code to produce the delta of new projects with respect to the previous delta/dump	2022-04-06 17:22:08 +02:00
Claudio Atzori	4190c9f6bc	[graph raw] avoid NPEs importing datasource consent fields	2022-04-06 15:34:31 +02:00
Antonis Lempesis	c442c91f89	computing stats in each step	2022-04-06 12:40:02 +03:00
Miriam Baglioni	b0f0ae180c	merge branch with master	2022-04-06 10:55:08 +02:00
Miriam Baglioni	c0dab69349	[Graph DUMP] removed not used sub workflow	2022-04-05 15:04:52 +02:00
Miriam Baglioni	79336d46c5	[Clean Context] first naive implementation of a functionality to clean not wanted contextes from one result. This implementation simply verifies the main title of the results start with a given string	2022-04-04 15:52:31 +02:00
Miriam Baglioni	f738acb85a	[Dump Funders] new code for the dump of products related to funders	2022-03-31 18:23:25 +02:00
Miriam Baglioni	5331dea71b	[Dump Funders] new code for the dump od products related to funders	2022-03-24 10:56:30 +01:00
Miriam Baglioni	9ba598a9b5	[Dump Funders] -	2022-03-23 17:10:19 +01:00
Miriam Baglioni	13d1d73b2e	[Dump Funders] -	2022-03-23 16:08:14 +01:00
Miriam Baglioni	faf79db4d5	[Dump Funders] -	2022-03-23 10:10:38 +01:00
				`@ -0,0 +1 @@`
				{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
				`@ -0,0 +1 @@`
				{"resultId":"50\|a89337edbe55::43e8b61e5e8d682545cb867be8118585","projectsList":[{"id":"40\|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null},{"id":"40\|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null}]}