Merge branch 'beta' into 7096-fileGZip-collector-plugin

2022-06-16 09:22:16 +02:00 · 2022-06-16 09:22:16 +02:00 · 06b5533d4c
parent 623f7be26d 88d531dc91
commit 06b5533d4c
64 changed files with 2685 additions and 667 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
 import java.sql.Array;
 import java.sql.SQLException;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
@ -118,6 +120,17 @@ public class OafMapperUtils {
 			.collect(Collectors.toList());
 	}
 	public static <T> List<T> listValues(Array values) throws SQLException {
 		if (Objects.isNull(values)) {
 			return null;
 		}
 		return Arrays
 			.stream((T[]) values.getArray())
 			.filter(Objects::nonNull)
 			.distinct()
 			.collect(Collectors.toList());
 	}
 	public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
 		return values
 			.stream()
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -44,105 +44,104 @@ class OafMapperUtilsTest {
 	@Test
 	void testDateValidation() {
-		assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z  ").isPresent());
+		assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z  "));
-		assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
+		assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
-		assertTrue(GraphCleaningFunctions.doCleanDate("  2016-04-05").isPresent());
+		assertNotNull(GraphCleaningFunctions.cleanDate("  2016-04-05"));
-		assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
+		assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
-		assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
+		assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan  2 15:04:05 2006"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan  2 15:04:05 MST 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan  2 15:04:05 MST 2006"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
-		assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
+		assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
-		assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
+		assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
-		assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
+		assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
 		assertEquals(
 			"2015-07-03",
-			GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
+			GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
-		assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
+		assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
-		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
+		assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
-		assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
+		assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
-		assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
+		assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
-		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
+		assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
-		assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
+		assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
-		assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
+		assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
-		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
+		assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
-		assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
+		assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
-		assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
+		assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
-		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
+		assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
-		assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
+		assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
-		assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
+		assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
-		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
+		assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
-		assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
+		assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
-		assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
+		assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
-		assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
+		assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
-		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
+		assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
-		assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
+		assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
-		assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
+		assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
-		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
+		assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
-		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
+		assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
-		assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
+		assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
-		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
+		assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
-		assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
+		assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
-		assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
+		assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
-		assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
+		assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
-		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
+		assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
 		assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
 		assertEquals(
-			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
+			"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
 		assertEquals(
-			"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
+			"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
-		assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
+		assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
-		assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
+		assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
-		assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
+		assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
-		assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
+		assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
-		assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
+		assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
-		assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
+		assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
-		assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
+		assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
-		assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
+		assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
-		assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
+		assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
-		assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
+		assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
-		assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
+		assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
-		assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
+		assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
-		assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
+		assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
 import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
 import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;
@ -168,38 +165,10 @@ public class GenerateRorActionSetJob {
 		final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
 		res.add(new AtomicAction<>(Organization.class, o));
 		for (final Relationship rorRel : r.getRelationships()) {
 			if (rorRel.getType().equalsIgnoreCase("parent")) {
 				final String orgId1 = calculateOpenaireId(r.getId());
 				final String orgId2 = calculateOpenaireId(rorRel.getId());
 				res
 					.add(
 						new AtomicAction<>(Relation.class,
 							calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
 				res
 					.add(
 						new AtomicAction<>(Relation.class,
 							calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
 			}
 		}
 		return res;
 	}
 	private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
 		final Relation rel = new Relation();
 		rel.setSource(source);
 		rel.setTarget(target);
 		rel.setRelType(ORG_ORG_RELTYPE);
 		rel.setSubRelType(ModelConstants.RELATIONSHIP);
 		rel.setRelClass(relClass);
 		rel.setCollectedfrom(ROR_COLLECTED_FROM);
 		rel.setDataInfo(ROR_DATA_INFO);
 		rel.setLastupdatetimestamp(System.currentTimeMillis());
 		return rel;
 	}
 	private static String calculateOpenaireId(final String rorId) {
 		return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
@ -10,6 +10,8 @@ import java.util.List;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.MapGroupsFunction;
@ -24,11 +26,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Measure;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import scala.Tuple2;
 /**
 * created the Atomic Action for each type of results
@ -73,7 +77,7 @@ public class SparkAtomicActionUsageJob implements Serializable {
 			spark -> {
 				removeOutputDir(spark, outputPath);
 				prepareResults(dbname, spark, workingPath);
-				prepareActionSet(spark, workingPath, outputPath);
+				writeActionSet(spark, workingPath, outputPath);
 			});
 	}
@ -89,7 +93,7 @@ public class SparkAtomicActionUsageJob implements Serializable {
 			.json(workingPath);
 	}
-	public static void prepareActionSet(SparkSession spark, String inputPath, String outputPath) {
+	public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) {
 		readPath(spark, inputPath, UsageStatsModel.class)
 			.groupByKey((MapFunction<UsageStatsModel, String>) us -> us.getResult_id(), Encoders.STRING())
 			.mapGroups((MapGroupsFunction<String, UsageStatsModel, Result>) (k, it) -> {
@ -105,10 +109,13 @@ public class SparkAtomicActionUsageJob implements Serializable {
 				res.setMeasures(getMeasure(first.getDownloads(), first.getViews()));
 				return res;
 			}, Encoders.bean(Result.class))
-			.write()
+			.toJavaRDD()
-			.mode(SaveMode.Overwrite)
+			.map(p -> new AtomicAction(p.getClass(), p))
-			.option("compression", "gzip")
+			.mapToPair(
-			.json(outputPath);
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
 					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
 	}
 	private static List<Measure> getMeasure(Long downloads, Long views) {
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
@ -15,7 +15,7 @@
  "official_name": "Aperta TÜBİTAK Open Archive"
 },
 "BL.CAM": {
-  "openaire_id": "re3data_____::r3d100010620",
+  "openaire_id": "opendoar____::109",
  "datacite_name": "Apollo",
  "official_name": "Apollo"
 },
@ -196,7 +196,7 @@
 },
 "CSIC.DIGITAL": {
  "openaire_id": "re3data_____::r3d100011076",
-  "datacite_name": "DIGITAL.CSIC",
+  "datacite_name": "Digital CSIC",
  "official_name": "DIGITAL.CSIC"
 },
 "BL.DRI": {
@ -644,6 +644,11 @@
  "datacite_name": "PANGAEA",
  "official_name": "PANGAEA"
 },
 "TIB.PANGAEA": {
  "openaire_id": "re3data_____::r3d100010134",
  "datacite_name": "PANGAEA",
  "official_name": "PANGAEA"
 },
 "NASAPDS.NASAPDS": {
  "openaire_id": "re3data_____::r3d100010121",
  "datacite_name": "PDS",
@ -896,7 +901,7 @@
 },
 "FIGSHARE.UCT": {
  "openaire_id": "re3data_____::r3d100012633",
-  "datacite_name": "ZivaHub",
+  "datacite_name": "University of Cape Town (UCT)",
  "official_name": "ZivaHub"
 },
 "BL.UCLAN": {
@ -1030,9 +1035,9 @@
  "official_name": "ZBW Journal Data Archive"
 },
 "CERN.ZENODO": {
-  "openaire_id": "re3data_____::r3d100010468",
+  "openaire_id": "opendoar____::2659",
  "datacite_name": "Zenodo",
-  "official_name": "Zenodo"
+  "official_name": "ZENODO"
 },
 "ZBW.ZEW": {
  "openaire_id": "re3data_____::r3d100010399",
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
  }
  /** This method should skip record if json contains invalid text
-    * defined in gile datacite_filter
+    * defined in file datacite_filter
    *
-    * @param json
+    * @param record : unparsed datacite record
    * @param json : parsed record
    * @return True if the record should be skipped
    */
-  def skip_record(json: String): Boolean = {
+  def skip_record(record: String, json: org.json4s.JValue): Boolean = {
-    datacite_filter.exists(f => json.contains(f))
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
      .extractOrElse[String]("")
      .equalsIgnoreCase("FAIRsharing")
  }
  @deprecated("this method will be removed", "dhp")
@ -304,12 +309,13 @@ object DataciteToOAFTransformation {
    vocabularies: VocabularyGroup,
    exportLinks: Boolean
  ): List[Oaf] = {
    if (skip_record(input))
      return List()
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)
    if (skip_record(input, json))
      return List()
    val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
    val resourceTypeGeneral =
      (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
@ -13,9 +13,6 @@ import org.apache.hadoop.io.Text;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
@ -26,7 +23,6 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.oaf.Result;
@ -75,13 +71,14 @@ public class SparkAtomicActionCountJobTest {
 			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb")
 			.getPath();
-		SparkAtomicActionUsageJob.prepareActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
+		SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
 		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
 		JavaRDD<Result> tmp = sc
-			.textFile(workingDir.toString() + "/actionSet")
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
-			.map(usm -> OBJECT_MAPPER.readValue(usm, Result.class));
+			.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class))
 			.map(aa -> (Result) aa.getPayload());
 		Assertions.assertEquals(9, tmp.count());
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
@ -107,4 +107,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
  }
  @Test
  def testFilter(): Unit = {
    val record = Source
      .fromInputStream(
        getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
      )
      .mkString
    val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
    val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
    assertTrue(res.isEmpty)
  }
 }
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.crossref
 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf._
-import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
+import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
 import eu.dnetlib.dhp.utils.DHPUtils
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil._
@ -576,14 +576,19 @@ case object Crossref2Oaf {
  def extractDate(dt: String, datePart: List[List[Int]]): String = {
    if (StringUtils.isNotBlank(dt))
-      return dt
+      return GraphCleaningFunctions.cleanDate(dt)
    if (datePart != null && datePart.size == 1) {
      val res = datePart.head
      if (res.size == 3) {
        val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
        if (dp.length == 10) {
-          return dp
+          return GraphCleaningFunctions.cleanDate(dp)
        }
      } else if (res.size == 2) {
        val dp = f"${res.head}-${res(1)}%02d-01"
        return GraphCleaningFunctions.cleanDate(dp)
      } else if (res.size == 1) {
        return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
      }
    }
    null
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/issue_date.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/issue_date.json
@ -0,0 +1,330 @@
 {
   "indexed":{
      "date-parts":[
         [
            2022,
            4,
            14
         ]
      ],
      "date-time":"2022-04-14T11:27:30Z",
      "timestamp":1649935650109
   },
   "reference-count":22,
   "publisher":"SAGE Publications",
   "issue":"2",
   "license":[
      {
         "start":{
            "date-parts":[
               [
                  1980,
                  4,
                  1
               ]
            ],
            "date-time":"1980-04-01T00:00:00Z",
            "timestamp":323395200000
         },
         "content-version":"tdm",
         "delay-in-days":0,
         "URL":"http:\/\/journals.sagepub.com\/page\/policies\/text-and-data-mining-license"
      }
   ],
   "content-domain":{
      "domain":[
      ],
      "crossmark-restriction":false
   },
   "short-container-title":[
      "Perception"
   ],
   "published-print":{
      "date-parts":[
         [
            1980,
            4
         ]
      ]
   },
   "abstract":"<jats:p> To answer the question \u2018What is suppressed during binocular rivalry?\u2019 a series of three experiments was performed. In the first experiment observers viewed binocular rivalry between orthogonally oriented patterns. When the dominant and suppressed patterns were interchanged between the eyes observers continued seeing with the dominant eye, indicating that an eye, not a pattern, is suppressed during rivalry. In a second experiment it was found that a suppressed eye was able to contribute to stereopsis. A third experiment demonstrated that the predominance of an eye could be influenced by prior adaptation of the other eye, indicating that binocular mechanisms participate in the rivalry process. <\/jats:p>",
   "DOI":"10.1068\/p090223",
   "type":"journal-article",
   "created":{
      "date-parts":[
         [
            2007,
            1,
            23
         ]
      ],
      "date-time":"2007-01-23T15:21:36Z",
      "timestamp":1169565696000
   },
   "page":"223-231",
   "source":"Crossref",
   "is-referenced-by-count":123,
   "title":[
      "What is Suppressed during Binocular Rivalry?"
   ],
   "prefix":"10.1177",
   "volume":"9",
   "author":[
      {
         "given":"Randolph",
         "family":"Blake",
         "sequence":"first",
         "affiliation":[
            {
               "name":"Cresap Neuroscience Laboratory, Northwestern University, Evanston, Illinois 60201, USA"
            }
         ]
      },
      {
         "given":"David H",
         "family":"Westendorf",
         "sequence":"additional",
         "affiliation":[
            {
               "name":"Department of Psychology, University of Arkansas, Fayetteville, Arkansas 72701, USA"
            }
         ]
      },
      {
         "given":"Randall",
         "family":"Overton",
         "sequence":"additional",
         "affiliation":[
            {
               "name":"Department of Psychology, Illinois State University, Normal, Illinois 61761, USA"
            }
         ]
      }
   ],
   "member":"179",
   "published-online":{
      "date-parts":[
         [
            2016,
            6,
            25
         ]
      ]
   },
   "reference":[
      {
         "key":"bibr1-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1136\/bjo.37.1.37"
      },
      {
         "key":"bibr2-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1037\/0096-1523.5.2.315"
      },
      {
         "key":"bibr3-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1016\/0042-6989(74)90065-0"
      },
      {
         "key":"bibr4-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1068\/p080143"
      },
      {
         "key":"bibr5-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1016\/0042-6989(70)90036-2"
      },
      {
         "key":"bibr6-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1113\/jphysiol.1969.sp008862"
      },
      {
         "key":"bibr7-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1113\/jphysiol.1972.sp010006"
      },
      {
         "key":"bibr8-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1146\/annurev.ps.23.020172.002213"
      },
      {
         "key":"bibr9-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1126\/science.166.3902.245"
      },
      {
         "key":"bibr10-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1037\/h0075805"
      },
      {
         "key":"bibr11-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1113\/jphysiol.1968.sp008552"
      },
      {
         "key":"bibr12-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1113\/jphysiol.1965.sp007784"
      },
      {
         "key":"bibr13-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1037\/h0032455"
      },
      {
         "key":"bibr14-p090223",
         "volume-title":"Treatise on Physiological Optics",
         "volume":"3",
         "author":"von Helmholtz H",
         "year":"1866",
         "edition":"3"
      },
      {
         "key":"bibr15-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1068\/p040125"
      },
      {
         "key":"bibr16-p090223",
         "volume-title":"On Binocular Rivalry",
         "author":"Levelt W J M",
         "year":"1965"
      },
      {
         "key":"bibr17-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1001\/archopht.1935.00840020011001"
      },
      {
         "key":"bibr18-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.3758\/BF03205796"
      },
      {
         "key":"bibr19-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.3758\/BF03210180"
      },
      {
         "key":"bibr20-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1037\/0033-2909.85.2.376"
      },
      {
         "key":"bibr21-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.1016\/0042-6989(79)90169-X"
      },
      {
         "key":"bibr22-p090223",
         "doi-asserted-by":"publisher",
         "DOI":"10.3758\/BF03210465"
      }
   ],
   "container-title":[
      "Perception"
   ],
   "original-title":[
   ],
   "language":"en",
   "link":[
      {
         "URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
         "content-type":"application\/pdf",
         "content-version":"vor",
         "intended-application":"text-mining"
      },
      {
         "URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
         "content-type":"unspecified",
         "content-version":"vor",
         "intended-application":"similarity-checking"
      }
   ],
   "deposited":{
      "date-parts":[
         [
            2021,
            12,
            3
         ]
      ],
      "date-time":"2021-12-03T11:49:48Z",
      "timestamp":1638532188000
   },
   "score":1,
   "resource":{
      "primary":{
         "URL":"http:\/\/journals.sagepub.com\/doi\/10.1068\/p090223"
      }
   },
   "subtitle":[
   ],
   "short-title":[
   ],
   "issued":{
      "date-parts":[
         [
            1980,
            4
         ]
      ]
   },
   "references-count":22,
   "journal-issue":{
      "issue":"2",
      "published-print":{
         "date-parts":[
            [
               1980,
               4
            ]
         ]
      }
   },
   "alternative-id":[
      "10.1068\/p090223"
   ],
   "URL":"http:\/\/dx.doi.org\/10.1068\/p090223",
   "relation":{
   },
   "ISSN":[
      "0301-0066",
      "1468-4233"
   ],
   "issn-type":[
      {
         "value":"0301-0066",
         "type":"print"
      },
      {
         "value":"1468-4233",
         "type":"electronic"
      }
   ],
   "subject":[
      "Artificial Intelligence",
      "Sensory Systems",
      "Experimental and Cognitive Psychology",
      "Ophthalmology"
   ],
   "published":{
      "date-parts":[
         [
            1980,
            4
         ]
      ]
   }
 }
--- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
@ -73,6 +73,20 @@ class CrossrefMappingTest {
  }
  @Test
  def crossrefIssueDateTest(): Unit = {
    val json =
      Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
    assertNotNull(json)
    assertFalse(json.isEmpty)
    val resultList: List[Oaf] = Crossref2Oaf.convert(json)
    assertTrue(resultList.nonEmpty)
    val items = resultList.filter(p => p.isInstanceOf[Result])
    println(mapper.writeValueAsString(items.head))
  }
  @Test
  def testOrcidID(): Unit = {
    val json = Source
@ -82,7 +96,7 @@ class CrossrefMappingTest {
      .mkString
    assertNotNull(json)
-    assertFalse(json.isEmpty);
+    assertFalse(json.isEmpty)
    val resultList: List[Oaf] = Crossref2Oaf.convert(json)
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java
@ -0,0 +1,251 @@
 package eu.dnetlib.dhp.bulktag;
 import static eu.dnetlib.dhp.PropagationConstant.readPath;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.*;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 public class SparkEoscTag {
 	private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
 		.qualifier(
 			"EOSC",
 			"European Open Science Cloud",
 			ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
 	public static final DataInfo EOSC_DATAINFO = OafMapperUtils
 		.dataInfo(
 			false, "propagation", true, false,
 			OafMapperUtils
 				.qualifier(
 					"propagation:subject", "Inferred by OpenAIRE",
 					ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
 			"0.9");
 	public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils
 		.structuredProperty(
 			"EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO);
 	public final static StructuredProperty EOSC_GALAXY = OafMapperUtils
 		.structuredProperty(
 			"EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO);
 	public final static StructuredProperty EOSC_TWITTER = OafMapperUtils
 		.structuredProperty(
 			"EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkEoscTag.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String workingPath = parser.get("workingPath");
 		log.info("workingPath: {}", workingPath);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				execEoscTag(spark, inputPath, workingPath);
 			});
 	}
 	private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
 		readPath(spark, inputPath + "/software", Software.class)
 			.map((MapFunction<Software, Software>) s -> {
 				List<StructuredProperty> sbject;
 				if (!Optional.ofNullable(s.getSubject()).isPresent())
 					s.setSubject(new ArrayList<>());
 				sbject = s.getSubject();
 				if (containsCriteriaNotebook(s)) {
 					sbject.add(EOSC_NOTEBOOK);
 					if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) {
 						sbject = sbject.stream().map(sb -> {
 							if (sb.getValue().equals("EOSC Jupyter Notebook")) {
 								return null;
 							}
 							return sb;
 						}).filter(Objects::nonNull).collect(Collectors.toList());
 						s.setSubject(sbject);
 					}
 				}
 				if (containsCriteriaGalaxy(s)) {
 					sbject.add(EOSC_GALAXY);
 				}
 				return s;
 			}, Encoders.bean(Software.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingPath + "/software");
 		readPath(spark, workingPath + "/software", Software.class)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(inputPath + "/software");
 		readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
 			.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
 				List<StructuredProperty> sbject;
 				if (!Optional.ofNullable(orp.getSubject()).isPresent())
 					orp.setSubject(new ArrayList<>());
 				sbject = orp.getSubject();
 				if (containsCriteriaGalaxy(orp)) {
 					sbject.add(EOSC_GALAXY);
 				}
 				if (containscriteriaTwitter(orp)) {
 					sbject.add(EOSC_TWITTER);
 				}
 				return orp;
 			}, Encoders.bean(OtherResearchProduct.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingPath + "/otherresearchproduct");
 		readPath(spark, workingPath + "/otherresearchproduct", OtherResearchProduct.class)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(inputPath + "/otherresearchproduct");
 		readPath(spark, inputPath + "/dataset", Dataset.class)
 			.map((MapFunction<Dataset, Dataset>) d -> {
 				List<StructuredProperty> sbject;
 				if (!Optional.ofNullable(d.getSubject()).isPresent())
 					d.setSubject(new ArrayList<>());
 				sbject = d.getSubject();
 				if (containscriteriaTwitter(d)) {
 					sbject.add(EOSC_TWITTER);
 				}
 				return d;
 			}, Encoders.bean(Dataset.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingPath + "/dataset");
 		readPath(spark, workingPath + "/dataset", Dataset.class)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(inputPath + "/dataset");
 	}
 	private static boolean containscriteriaTwitter(Result r) {
 		Set<String> words = getWordsSP(r.getTitle());
 		words.addAll(getWordsF(r.getDescription()));
 		if (words.contains("twitter") &&
 			(words.contains("data") || words.contains("dataset")))
 			return true;
 		if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
 			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
 			return true;
 		return false;
 	}
 	private static boolean containsCriteriaGalaxy(Result r) {
 		Set<String> words = getWordsSP(r.getTitle());
 		words.addAll(getWordsF(r.getDescription()));
 		if (words.contains("galaxy") &&
 			words.contains("workflow"))
 			return true;
 		if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
 			r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
 			return true;
 		return false;
 	}
 	private static boolean containsCriteriaNotebook(Software s) {
 		if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
 			return true;
 		if (s
 			.getSubject()
 			.stream()
 			.anyMatch(
 				sbj -> sbj.getValue().toLowerCase().contains("python") &&
 					sbj.getValue().toLowerCase().contains("notebook")))
 			return true;
 		if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
 			s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
 			return true;
 		return false;
 	}
 	private static Set<String> getSubjects(List<StructuredProperty> s) {
 		Set<String> subjects = new HashSet<>();
 		s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" "))));
 		s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase()));
 		return subjects;
 	}
 	private static Set<String> getWordsSP(List<StructuredProperty> elem) {
 		Set<String> words = new HashSet<>();
 		Optional
 			.ofNullable(elem)
 			.ifPresent(
 				e -> e
 					.forEach(
 						t -> words
 							.addAll(
 								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
 		return words;
 	}
 	private static Set<String> getWordsF(List<Field<String>> elem) {
 		Set<String> words = new HashSet<>();
 		Optional
 			.ofNullable(elem)
 			.ifPresent(
 				e -> e
 					.forEach(
 						t -> words
 							.addAll(
 								Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
 //		elem
 //			.forEach(
 //				t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
 		return words;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -102,21 +102,28 @@ public class SparkCountryPropagationJob {
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
 		return t -> {
 			Optional.ofNullable(t._2()).ifPresent(r -> {
 				if (Optional.ofNullable(t._1().getCountry()).isPresent())
 					t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
 				else
 					t._1().setCountry(merge(null, t._2().getCountrySet()));
 			});
 			return t._1();
 		};
 	}
 	private static List<Country> merge(List<Country> c1, List<CountrySbs> c2) {
-		HashSet<String> countries = c1
+		HashSet<String> countries = new HashSet<>();
 		if (Optional.ofNullable(c1).isPresent()) {
 			countries = c1
 				.stream()
 				.map(Qualifier::getClassid)
 				.collect(Collectors.toCollection(HashSet::new));
 		}
 		HashSet<String> finalCountries = countries;
 		return c2
 			.stream()
-			.filter(c -> !countries.contains(c.getClassid()))
+			.filter(c -> !finalCountries.contains(c.getClassid()))
 			.map(c -> getCountry(c.getClassid(), c.getClassname()))
 			.collect(Collectors.toList());
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
@ -0,0 +1,21 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "wp",
    "paramLongName": "workingPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -204,7 +204,31 @@
        <error to="Kill"/>
    </action>
-    <join name="wait" to="End"/>
+    <join name="wait" to="eosc_tag"/>
    <action name="eosc_tag">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>EOSC_tagging</name>
            <class>eu.dnetlib.dhp.bulktag.SparkEoscTag</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${outputPath}</arg>
            <arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java
@ -0,0 +1,538 @@
 package eu.dnetlib.dhp.bulktag;
 import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.*;
 public class EOSCTagJobTest {
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static SparkSession spark;
 	private static Path workingDir;
 	private static final Logger log = LoggerFactory.getLogger(EOSCTagJobTest.class);
 	@BeforeAll
 	public static void beforeAll() throws IOException {
 		workingDir = Files.createTempDirectory(EOSCTagJobTest.class.getSimpleName());
 		log.info("using work dir {}", workingDir);
 		SparkConf conf = new SparkConf();
 		conf.setAppName(EOSCTagJobTest.class.getSimpleName());
 		conf.setMaster("local[*]");
 		conf.set("spark.driver.host", "localhost");
 		conf.set("hive.metastore.local", "true");
 		conf.set("spark.ui.enabled", "false");
 		conf.set("spark.sql.warehouse.dir", workingDir.toString());
 		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
 		spark = SparkSession
 			.builder()
 			.appName(EOSCTagJobTest.class.getSimpleName())
 			.config(conf)
 			.getOrCreate();
 	}
 	@AfterAll
 	public static void afterAll() throws IOException {
 		FileUtils.deleteDirectory(workingDir.toFile());
 		spark.stop();
 	}
 	@Test
 	void jupyterUpdatesTest() throws Exception {
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/software").getPath())
 			.map(
 				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
 				Encoders.bean(Software.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/software");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/dataset").getPath())
 			.map(
 				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
 				Encoders.bean(Dataset.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/dataset");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct").getPath())
 			.map(
 				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
 					.readValue(value, OtherResearchProduct.class),
 				Encoders.bean(OtherResearchProduct.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/otherresearchproduct");
 		SparkEoscTag
 			.main(
 				new String[] {
 					"-isSparkSessionManaged", Boolean.FALSE.toString(),
 					"-sourcePath",
 					workingDir.toString() + "/input",
 					"-workingPath", workingDir.toString() + "/working"
 				});
 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		JavaRDD<Software> tmp = sc
 			.textFile(workingDir.toString() + "/input/software")
 			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
 		Assertions.assertEquals(10, tmp.count());
 		Assertions
 			.assertEquals(
 				4,
 				tmp
 					.filter(
 						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
 					.count());
 		Assertions
 			.assertEquals(
 				1, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertTrue(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		Assertions
 			.assertEquals(
 				5, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertFalse(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		Assertions
 			.assertEquals(
 				9, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertTrue(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		Assertions
 			.assertEquals(
 				5, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertFalse(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		Assertions
 			.assertEquals(
 				9, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertTrue(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		List<StructuredProperty> subjects = tmp
 			.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
 			.collect()
 			.get(0)
 			.getSubject();
 		Assertions.assertEquals(8, subjects.size());
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("algorithme")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("simulation numérique")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de gaz")));
 		Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de liquide")));
 		Assertions
 			.assertEquals(
 				10, sc
 					.textFile(workingDir.toString() + "/input/dataset")
 					.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
 					.count());
 		Assertions
 			.assertEquals(
 				0, sc
 					.textFile(workingDir.toString() + "/input/dataset")
 					.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
 					.filter(
 						ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
 					.count());
 		Assertions
 			.assertEquals(
 				10, sc
 					.textFile(workingDir.toString() + "/input/otherresearchproduct")
 					.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
 					.count());
 		Assertions
 			.assertEquals(
 				0, sc
 					.textFile(workingDir.toString() + "/input/otherresearchproduct")
 					.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
 					.filter(
 						ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
 					.count());
 		// spark.stop();
 	}
 	@Test
 	void galaxyUpdatesTest() throws Exception {
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/software").getPath())
 			.map(
 				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
 				Encoders.bean(Software.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/software");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/dataset").getPath())
 			.map(
 				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
 				Encoders.bean(Dataset.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/dataset");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct").getPath())
 			.map(
 				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
 					.readValue(value, OtherResearchProduct.class),
 				Encoders.bean(OtherResearchProduct.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/otherresearchproduct");
 		SparkEoscTag
 			.main(
 				new String[] {
 					"-isSparkSessionManaged", Boolean.FALSE.toString(),
 					"-sourcePath",
 					workingDir.toString() + "/input",
 					"-workingPath", workingDir.toString() + "/working"
 				});
 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		JavaRDD<Software> tmp = sc
 			.textFile(workingDir.toString() + "/input/software")
 			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
 		Assertions.assertEquals(11, tmp.count());
 		Assertions
 			.assertEquals(
 				1,
 				tmp
 					.filter(
 						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
 					.count());
 		Assertions
 			.assertEquals(
 				2, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertTrue(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
 		Assertions
 			.assertEquals(
 				5, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertEquals(
 				8, tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertFalse(
 				tmp
 					.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
 		JavaRDD<OtherResearchProduct> orp = sc
 			.textFile(workingDir.toString() + "/input/otherresearchproduct")
 			.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
 		Assertions.assertEquals(10, orp.count());
 		Assertions
 			.assertEquals(
 				1,
 				orp
 					.filter(
 						s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
 					.count());
 		Assertions
 			.assertEquals(
 				3, orp
 					.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertTrue(
 				orp
 					.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
 		Assertions
 			.assertEquals(
 				2, orp
 					.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertFalse(
 				orp
 					.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
 		Assertions
 			.assertEquals(
 				2, orp
 					.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.size());
 		Assertions
 			.assertFalse(
 				orp
 					.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
 					.collect()
 					.get(0)
 					.getSubject()
 					.stream()
 					.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
 	}
 	@Test
 	void twitterUpdatesTest() throws Exception {
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/software").getPath())
 			.map(
 				(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
 				Encoders.bean(Software.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/software");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/dataset").getPath())
 			.map(
 				(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
 				Encoders.bean(Dataset.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/dataset");
 		spark
 			.read()
 			.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct").getPath())
 			.map(
 				(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
 					.readValue(value, OtherResearchProduct.class),
 				Encoders.bean(OtherResearchProduct.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(workingDir.toString() + "/input/otherresearchproduct");
 		SparkEoscTag
 			.main(
 				new String[] {
 					"-isSparkSessionManaged", Boolean.FALSE.toString(),
 					"-sourcePath",
 					workingDir.toString() + "/input",
 					"-workingPath", workingDir.toString() + "/working"
 				});
 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		JavaRDD<Software> tmp = sc
 			.textFile(workingDir.toString() + "/input/software")
 			.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
 		Assertions.assertEquals(10, tmp.count());
 		Assertions
 			.assertEquals(
 				0,
 				tmp
 					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
 					.count());
 		JavaRDD<OtherResearchProduct> orp = sc
 			.textFile(workingDir.toString() + "/input/otherresearchproduct")
 			.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
 		Assertions.assertEquals(10, orp.count());
 		Assertions
 			.assertEquals(
 				3,
 				orp
 					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
 					.count());
 		JavaRDD<Dataset> dats = sc
 			.textFile(workingDir.toString() + "/input/dataset")
 			.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
 		Assertions.assertEquals(11, dats.count());
 		Assertions
 			.assertEquals(
 				3,
 				dats
 					.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
 					.count());
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/galaxy/software/software_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/dataset/dataset_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct/otherresearchproduct_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct/otherresearchproduct_10.json
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/software/software_10.json
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/twitter/software/software_10.json
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.*;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
@ -136,7 +137,7 @@ public class MergeGraphTableSparkJob {
 	/**
 	 * Datasources involved in the merge operation doesn't obey to the infra precedence policy, but relies on a custom
 	 * behaviour that, given two datasources from beta and prod returns the one from prod with the highest
-	 * compatibility among the two.
+	 * compatibility among the two. Furthermore, the procedure merges the collectedfrom, originalId, and pid lists.
 	 *
 	 * @param p datasource from PROD
 	 * @param b datasource from BETA
@ -160,9 +161,37 @@ public class MergeGraphTableSparkJob {
 		List<Qualifier> list = Arrays.asList(dp.getOpenairecompatibility(), db.getOpenairecompatibility());
 		dp.setOpenairecompatibility(Collections.min(list, new DatasourceCompatibilityComparator()));
 		dp
 			.setCollectedfrom(
 				Stream
 					.concat(
 						Optional
 							.ofNullable(dp.getCollectedfrom())
 							.map(Collection::stream)
 							.orElse(Stream.empty()),
 						Optional
 							.ofNullable(db.getCollectedfrom())
 							.map(Collection::stream)
 							.orElse(Stream.empty()))
 					.distinct() // relies on KeyValue.equals
 					.collect(Collectors.toList()));
 		dp.setOriginalId(mergeLists(dp.getOriginalId(), db.getOriginalId()));
 		dp.setPid(mergeLists(dp.getPid(), db.getPid()));
 		return (P) dp;
 	}
 	private static final <T> List<T> mergeLists(final List<T>... lists) {
 		return Arrays
 			.stream(lists)
 			.filter(Objects::nonNull)
 			.flatMap(List::stream)
 			.filter(Objects::nonNull)
 			.distinct()
 			.collect(Collectors.toList());
 	}
 	private static <P extends Oaf, B extends Oaf> P mergeWithPriorityToPROD(Optional<P> p, Optional<B> b) {
 		if (b.isPresent() & !p.isPresent()) {
 			return (P) b.get();
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -27,15 +27,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
-import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
 import java.io.Closeable;
 import java.io.IOException;
@ -143,8 +135,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					smdbe.execute("queryClaims.sql", smdbe::processClaims);
 					break;
 				case openaire:
-					log.info("Processing datasources...");
+					log.info("Processing services...");
-					smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
+					smdbe.execute("queryServices.sql", smdbe::processService, verifyNamespacePrefix);
 					log.info("Processing projects...");
 					if (dbSchema.equalsIgnoreCase("beta")) {
@ -156,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					log.info("Processing Organizations...");
 					smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
-					log.info("Processing relationsNoRemoval ds <-> orgs ...");
+					log.info("Processing relations services <-> orgs ...");
 					smdbe
 						.execute(
-							"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
+							"queryServiceOrganization.sql", smdbe::processServiceOrganization,
 							verifyNamespacePrefix);
 					log.info("Processing projects <-> orgs ...");
@ -235,32 +227,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 		dbClient.processResults(sql, consumer);
 	}
-	public List<Oaf> processDatasource(final ResultSet rs) {
+	public List<Oaf> processService(final ResultSet rs) {
 		try {
 			final DataInfo info = prepareDataInfo(rs);
 			final Datasource ds = new Datasource();
-			ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
+			ds.setId(createOpenaireId(10, rs.getString("id"), true));
 			ds
 				.setOriginalId(
 					Arrays
-						.asList((String[]) rs.getArray("identities").getArray())
+						.asList((String[]) rs.getArray("originalid").getArray())
 						.stream()
 						.filter(StringUtils::isNotBlank)
 						.collect(Collectors.toList()));
-			ds
+			ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
-				.setCollectedfrom(
+			ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
 					listKeyValues(
 						createOpenaireId(10, rs.getString("collectedfromid"), true),
 						rs.getString("collectedfromname")));
 			ds.setPid(new ArrayList<>());
 			ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
 			ds.setDateoftransformation(null); // Value not returned by the SQL query
 			ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
 			ds.setOaiprovenance(null); // Values not present in the DB
 			ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
 			ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
 			ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
 			ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
 			ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
 			ds.setOfficialname(field(rs.getString("officialname"), info));
 			ds.setEnglishname(field(rs.getString("englishname"), info));
@ -277,20 +267,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 			ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
 			ds.setOdpolicies(field(rs.getString("odpolicies"), info));
 			ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
-			ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
+			ds.setLanguages(listValues(rs.getArray("languages")));
 			ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
 			ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
 			ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
 			ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
 			ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
 			ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
 			ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
 			ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
 			ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
 			ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
 			ds.setVersioning(field(rs.getBoolean("versioning"), info));
 			ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
 			ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
-			ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
+
 			ds.setPidsystems(field(rs.getString("pidsystems"), info));
 			ds.setCertificates(field(rs.getString("certificates"), info));
 			ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
@ -299,13 +288,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					journal(
 						rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
 						rs.getString("issnLinking"), info)); // Journal
 			ds.setDataInfo(info);
 			ds.setLastupdatetimestamp(lastUpdateTimestamp);
 			ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
 			ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
 			ds.setThematic(rs.getBoolean("thematic"));
 			ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
 			ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
 			ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
 			ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
 			ds.setResearchproductaccesspolicies(listValues(rs.getArray("researchproductaccesspolicies")));
 			ds
 				.setResearchproductmetadataaccesspolicies(
 					listValues(rs.getArray("researchproductmetadataaccesspolicies")));
 			ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
 			ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
 			ds
@ -313,8 +307,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					Optional
 						.ofNullable(
 							rs.getDate("consenttermsofusedate"))
-						.map(c -> c.toString())
+						.map(java.sql.Date::toString)
 						.orElse(null));
 			ds
 				.setLastconsenttermsofusedate(
 					Optional
 						.ofNullable(
 							rs.getDate("lastconsenttermsofusedate"))
 						.map(java.sql.Date::toString)
 						.orElse(null));
 			ds.setDataInfo(info);
 			ds.setLastupdatetimestamp(lastUpdateTimestamp);
 			return Arrays.asList(ds);
 		} catch (final Exception e) {
@ -425,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 		}
 	}
-	public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
+	public List<Oaf> processServiceOrganization(final ResultSet rs) {
 		try {
 			final DataInfo info = prepareDataInfo(rs);
 			final String orgId = createOpenaireId(20, rs.getString("organization"), true);
-			final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
+			final String dsId = createOpenaireId(10, rs.getString("service"), true);
 			final List<KeyValue> collectedFrom = listKeyValues(
 				createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
@ -603,6 +607,32 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 			String.format("%.3f", trust));
 	}
 	private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
 		if (Objects.isNull(values)) {
 			return null;
 		}
 		return Arrays
 			.stream((String[]) values.getArray())
 			.filter(Objects::nonNull)
 			.distinct()
 			.map(s -> keyValueSplitting(s, "@@@"))
 			.collect(Collectors.toList());
 	}
 	public static KeyValue keyValueSplitting(final String s, String separator) {
 		if (StringUtils.isBlank(s)) {
 			return null;
 		}
 		final String[] arr = s.split(separator);
 		if (arr.length != 2) {
 			return null;
 		}
 		KeyValue kv = new KeyValue();
 		kv.setKey(createOpenaireId(10, arr[0], true));
 		kv.setValue(arr[1]);
 		return kv;
 	}
 	private Qualifier prepareQualifierSplitting(final String s) {
 		if (StringUtils.isBlank(s)) {
 			return null;
@ -711,10 +741,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 			final Relation r = new Relation();
 			r.setRelType(ORG_ORG_RELTYPE);
 			r.setSubRelType(ModelConstants.RELATIONSHIP);
-			r
+			r.setRelClass(rs.getString("type"));
 				.setRelClass(
 					rs.getString("type").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF
 						: ModelConstants.IS_CHILD_OF);
 			r.setSource(orgId1);
 			r.setTarget(orgId2);
 			r.setCollectedfrom(collectedFrom);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml
@ -283,7 +283,15 @@
        <error to="Kill"/>
    </action>
-    <join name="wait_dispatch" to="copy_relation"/>
+    <join name="wait_dispatch" to="delete_target_relation"/>
    <action name="delete_target_relation">
        <fs>
            <delete path="${nameNode}/${graphOutputPath}/relation"/>
        </fs>
        <ok to="copy_relation"/>
        <error to="Kill"/>
    </action>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
@ -30,6 +30,11 @@
            <value></value>
            <description>a blacklist of nsprefixes (comma separeted)</description>
        </property>
        <property>
            <name>reuseContent</name>
            <value>false</value>
            <description>reuse content in the aggregator database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -85,12 +90,20 @@
        </configuration>
    </global>
-	<start to="ImportDB"/>
+	<start to="reuse_db"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <decision name="reuse_db">
        <switch>
            <case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
            <case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
            <default to="ImportDB"/>
        </switch>
    </decision>
    <action name="ImportDB">
        <java>
            <prepare>
@ -102,6 +115,7 @@
            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--action</arg><arg>openaire</arg>
            <arg>--dbschema</arg><arg>${dbSchema}</arg>
            <arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
        </java>
@ -124,6 +138,55 @@
            <arg>--action</arg><arg>claims</arg>
            <arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
        </java>
        <ok to="GenerateEntities"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateEntities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>GenerateEntities</name>
            <class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory ${sparkExecutorMemory}
                --executor-cores ${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
            <arg>--targetPath</arg><arg>${workingDir}/entities</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
            <arg>--shouldHashId</arg><arg>true</arg>
        </spark>
        <ok to="GenerateGraph"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateGraph">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>GenerateGraph</name>
            <class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory ${sparkExecutorMemory}
                --executor-cores ${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=7680
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
            <arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql
@ -27,7 +27,7 @@ SELECT
 		'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
 		 array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
 FROM dsm_organizations o
-	LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
+	LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
 	LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
 	LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid)
 GROUP BY
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql
@ -10,4 +10,4 @@ SELECT
 	'OpenOrgs Database'                                            AS collectedfromname,
 	'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction
 FROM relationships
-WHERE reltype = 'Child' OR reltype = 'Parent'
+WHERE reltype = 'IsChildOf' OR reltype = 'IsParentOf'
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql
@ -16,4 +16,4 @@ SELECT
 FROM project_organization po
 	LEFT OUTER JOIN projects p ON (p.id = po.project)
-	LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom);
+	LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
@ -42,7 +42,7 @@ SELECT
                LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
                LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
-                LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
+                LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
                LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
                LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
@ -40,7 +40,7 @@ SELECT
                LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
                LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
-                LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
+                LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
                LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
                LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql
@ -1,5 +1,5 @@
 SELECT
-	dor.datasource                                           AS datasource,
+	dor.service                                              AS service,
 	dor.organization                                         AS organization,
 	NULL                                                     AS startdate,
 	NULL                                                     AS enddate,
@ -11,6 +11,6 @@ SELECT
    dc.officialname                                          AS collectedfromname,
 	'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
 	d.provenanceaction || '@@@dnet:provenanceActions'        AS provenanceaction
-FROM dsm_datasource_organization dor
+FROM dsm_service_organization dor
-	LEFT OUTER JOIN dsm_datasources d  ON (dor.datasource = d.id)
+	LEFT OUTER JOIN dsm_services d  ON (dor.service = d.id)
-	LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom);
+	LEFT OUTER JOIN dsm_services dc ON (dc.id = d.collectedfrom);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
@ -1,6 +1,7 @@
 SELECT
-	d.id                                                                                                       AS datasourceid,
+	d.id                                                                                                       AS id,
-	d.id || array_agg(distinct di.pid)                                                                         AS identities,
+	array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL)                AS originalid,
 	array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid,
 	d.officialname                                                                                             AS officialname,
 	d.englishname                                                                                              AS englishname,
 	d.contactemail                                                                                             AS contactemail,
@ -40,14 +41,13 @@ SELECT
 	END                                                                                                        AS openairecompatibility,
 	d.websiteurl                                                                                               AS websiteurl,
 	d.logourl                                                                                                  AS logourl,
-	array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END)                              AS accessinfopackage,
+	array_remove(array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END), NULL)  AS accessinfopackage,
 	d.latitude                                                                                                 AS latitude,
 	d.longitude                                                                                                AS longitude,
 	d.namespaceprefix                                                                                          AS namespaceprefix,
 	NULL                                                                                                       AS odnumberofitems,
 	NULL                                                                                                       AS odnumberofitemsdate,
-
+	(SELECT array_agg(s|| '###keyword@@@dnet:subject_classification_typologies')
 	(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
 		FROM UNNEST(
 			ARRAY(
 				SELECT trim(s)
@ -55,10 +55,15 @@ SELECT
 	d.description                                                                                              AS description,
 	NULL                                                                                                       AS odpolicies,
-	ARRAY(SELECT trim(s)
+	array_remove(ARRAY(SELECT trim(s)
-	      FROM unnest(string_to_array(d.languages, ',')) AS s)                                                 AS odlanguages,
+	      FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}')         AS odlanguages,
-	ARRAY(SELECT trim(s)
+	array_remove(ARRAY(SELECT trim(s)
-	      FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)                                           AS odcontenttypes,
+	      FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}')         AS languages,
 	-- Term provided only by     OpenDOAR:
 	--   probably updating the TR it could be replaced by research_entity_types[]
 	--   But a study on the vocabulary terms is needed
 	--   REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)              AS odcontenttypes,
 	false                                                                                                      AS inferred,
 	false                                                                                                      AS deletedbyinference,
 	0.9                                                                                                        AS trust,
@ -69,39 +74,59 @@ SELECT
 	d.releasestartdate                                                                                         AS releasestartdate,
 	d.releaseenddate                                                                                           AS releaseenddate,
 	d.missionstatementurl                                                                                      AS missionstatementurl,
-	d.dataprovider                                                                                             AS dataprovider,
+	-- the following 2 fields (provided by re3data) have been replaced by research_entity_types[]
-	d.serviceprovider                                                                                          AS serviceprovider,
+	--  VALUE 'Research Data' : d.dataprovider                                                                 AS dataprovider,
 	--  VALUE 'Services'      : d.serviceprovider                                                              AS serviceprovider,
 	d.databaseaccesstype                                                                                       AS databaseaccesstype,
 	d.datauploadtype                                                                                           AS datauploadtype,
 	d.databaseaccessrestriction                                                                                AS databaseaccessrestriction,
 	d.datauploadrestriction                                                                                    AS datauploadrestriction,
-	d.versioning                                                                                               AS versioning,
+	-- REPLACED BY version_control : d.versioning                                                              AS versioning,
 	d.version_control                                                                                          AS versioning,
 	d.version_control                                                                                          AS versioncontrol,
 	d.citationguidelineurl                                                                                     AS citationguidelineurl,
-	d.qualitymanagementkind                                                                                    AS qualitymanagementkind,
+	array_to_string(array_agg(distinct dps.scheme), ' ')                                                       AS pidsystems,
 	d.pidsystems                                                                                               AS pidsystems,
 	d.certificates                                                                                             AS certificates,
 	ARRAY[]::text[]                                                                                            AS policies,
-	dc.id                                                                                                      AS collectedfromid,
+	array_remove(
-	dc.officialname                                                                                            AS collectedfromname,
+	    array(
-	d.typology||'@@@dnet:datasource_typologies'                                                                AS datasourcetype,
+        select distinct cf
-	d.typology||'@@@dnet:datasource_typologies_ui'                                                             AS datasourcetypeui,
+        from unnest(
-	'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions'                                             AS provenanceaction,
+            dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname)
        ) as cf),
 	    NULL)                                                                                                  AS collectedfrom,
 	d._typology_to_remove_||'@@@dnet:datasource_typologies'                                                    AS datasourcetype,
 	d._typology_to_remove_||'@@@dnet:datasource_typologies_ui'                                                 AS datasourcetypeui,
 	d.eosc_type||'@@@dnet:eosc_types'                                                                          AS eosctype,
 	d.eosc_datasource_type||'@@@dnet:eosc_datasource_types'                                                    AS eoscdatasourcetype,
 	d.issn                                                                                                     AS issnPrinted,
 	d.eissn                                                                                                    AS issnOnline,
 	d.lissn                                                                                                    AS issnLinking,
 	d.research_entity_types                                                                                    AS researchentitytypes,
 	d.consenttermsofuse                                                                                        AS consenttermsofuse,
 	d.fulltextdownload                                                                                         AS fulltextdownload,
 	d.consenttermsofusedate                                                                                    AS consenttermsofusedate,
-	de.jurisdiction||'@@@eosc:jurisdictions'                                                                   AS jurisdiction,
+	d.lastconsenttermsofusedate                                                                                AS lastconsenttermsofusedate,
-	de.thematic                                                                                                AS thematic,
+	d.jurisdiction||'@@@eosc:jurisdictions'                                                                    AS jurisdiction,
-	de.knowledge_graph                                                                                         AS knowledgegraph,
+	d.thematic                                                                                                 AS thematic,
-	array(select unnest(de.content_policies)||'@@@eosc:contentpolicies')                                       AS contentpolicies
+	array(select unnest(d.content_policies)||'@@@eosc:contentpolicies')                                        AS contentpolicies,
 	nullif(trim(d.submission_policy_url), '')                                                                  AS submissionpolicyurl,
 	nullif(trim(d.preservation_policy_url), '')                                                                                  AS preservationpolicyurl,
 	array_remove(d.research_product_access_policies, '')                                                       AS researchproductaccesspolicies,
    array_remove(d.research_product_metadata_access_policies, '')                                              AS researchproductmetadataaccesspolicies
-FROM dsm_datasources d
+FROM dsm_services d
-LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
+LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id)
-LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
+LEFT OUTER JOIN dsm_api a ON (d.id = a.service)
-LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
+LEFT OUTER JOIN dsm_servicepids dp ON (d.id = dp.service)
-LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
+LEFT OUTER JOIN dsm_identities di ON (dp.pid = di.pid)
 LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id)
 LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id)
 LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id)
 LEFT OUTER JOIN dsm_pid_systems dps ON (d.id = dps.service)
 WHERE
    d.dedup_main_service = true
 GROUP BY
 	d.id,
@ -119,23 +144,27 @@ GROUP BY
 	d.releasestartdate,
 	d.releaseenddate,
 	d.missionstatementurl,
-	d.dataprovider,
+	-- TODO REMOVED ???: d.dataprovider,
-	d.serviceprovider,
+	-- TODO REMOVED ???: d.serviceprovider,
 	d.databaseaccesstype,
 	d.datauploadtype,
 	d.databaseaccessrestriction,
 	d.datauploadrestriction,
-	d.versioning,
+	-- REPLACED BY version_control : d.versioning,
 	d.version_control,
 	d.citationguidelineurl,
-	d.qualitymanagementkind,
+	-- REMOVED: d.qualitymanagementkind,
 	d.pidsystems,
 	d.certificates,
 	dc.id,
 	dc.officialname,
 	d.issn,
 	d.eissn,
 	d.lissn,
-	de.jurisdiction,
+	d.jurisdiction,
-	de.thematic,
+	d.thematic,
-	de.knowledge_graph,
+	-- REMOVED ???: de.knowledge_graph,
-	de.content_policies
+	d.content_policies,
 	d.submission_policy_url,
 	d.preservation_policy_url,
 	d.research_product_access_policies,
    d.research_product_metadata_access_policies
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala
@ -7,6 +7,7 @@ import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.collection.JavaConverters._
 object SparkConvertRDDtoDataset {
@ -94,8 +95,8 @@ object SparkConvertRDDtoDataset {
    log.info("Converting Relation")
    val relationSemanticFilter = List(
-      "cites",
+//      "cites",
-      "iscitedby",
+//      "iscitedby",
      "merges",
      "ismergedin",
      "HasAmongTopNSimilarDocuments",
@ -107,6 +108,12 @@ object SparkConvertRDDtoDataset {
      .map(s => mapper.readValue(s, classOf[Relation]))
      .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
      .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
      //filter OpenCitations relations
      .filter(r =>
        r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
          "opencitations".equalsIgnoreCase(k.getValue)
        )
      )
      .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
    spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala
@ -1,136 +0,0 @@
 package eu.dnetlib.dhp.sx.graph.pangaea
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Encoder, Encoders}
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods.parse
 import java.util.regex.Pattern
 import scala.language.postfixOps
 import scala.xml.{Elem, Node, XML}
 case class PangaeaDataModel(
  identifier: String,
  title: List[String],
  objectType: List[String],
  creator: List[String],
  publisher: List[String],
  dataCenter: List[String],
  subject: List[String],
  language: String,
  rights: String,
  parent: String,
  relation: List[String],
  linkage: List[(String, String)]
 ) {}
 object PangaeaUtils {
  def toDataset(input: String): PangaeaDataModel = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(input)
    val xml = (json \ "xml").extract[String]
    parseXml(xml)
  }
  def findDOIInRelation(input: List[String]): List[String] = {
    val pattern = Pattern.compile("\\b(10[.][0-9]{4,}(?:[.][0-9]+)*\\/(?:(?![\"&\\'<>])\\S)+)\\b")
    input
      .map(i => {
        val matcher = pattern.matcher(i)
        if (matcher.find())
          matcher.group(0)
        else
          null
      })
      .filter(i => i != null)
  }
  def attributeOpt(attribute: String, node: Node): Option[String] =
    node.attribute(attribute) flatMap (_.headOption) map (_.text)
  def extractLinkage(node: Elem): List[(String, String)] = {
    (node \ "linkage")
      .map(n => (attributeOpt("type", n), n.text))
      .filter(t => t._1.isDefined)
      .map(t => (t._1.get, t._2))(collection.breakOut)
  }
  def parseXml(input: String): PangaeaDataModel = {
    val xml = XML.loadString(input)
    val identifier = (xml \ "identifier").text
    val title: List[String] = (xml \ "title").map(n => n.text)(collection.breakOut)
    val pType: List[String] = (xml \ "type").map(n => n.text)(collection.breakOut)
    val creators: List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut)
    val publisher: List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut)
    val dataCenter: List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut)
    val subject: List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut)
    val language = (xml \ "language").text
    val rights = (xml \ "rights").text
    val parentIdentifier = (xml \ "parentIdentifier").text
    val relation: List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut)
    val relationFiltered = findDOIInRelation(relation)
    val linkage: List[(String, String)] = extractLinkage(xml)
    PangaeaDataModel(
      identifier,
      title,
      pType,
      creators,
      publisher,
      dataCenter,
      subject,
      language,
      rights,
      parentIdentifier,
      relationFiltered,
      linkage
    )
  }
  def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
    new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] {
      override def zero: PangaeaDataModel = null
      override def reduce(b: PangaeaDataModel, a: (String, PangaeaDataModel)): PangaeaDataModel = {
        if (b == null)
          a._2
        else {
          if (a == null)
            b
          else {
            if (b.title != null && b.title.nonEmpty)
              b
            else
              a._2
          }
        }
      }
      override def merge(b1: PangaeaDataModel, b2: PangaeaDataModel): PangaeaDataModel = {
        if (b1 == null)
          b2
        else {
          if (b2 == null)
            b1
          else {
            if (b1.title != null && b1.title.nonEmpty)
              b1
            else
              b2
          }
        }
      }
      override def finish(reduction: PangaeaDataModel): PangaeaDataModel = reduction
      override def bufferEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
      override def outputEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala
@ -1,58 +0,0 @@
 package eu.dnetlib.dhp.sx.graph.pangaea
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
 import org.apache.spark.{SparkConf, SparkContext}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.collection.JavaConverters._
 import scala.io.Source
 object SparkGeneratePanagaeaDataset {
  def main(args: Array[String]): Unit = {
    val logger: Logger = LoggerFactory.getLogger(getClass)
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(
      Source
        .fromInputStream(
          getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json")
        )
        .mkString
    )
    parser.parseArgument(args)
    val spark: SparkSession =
      SparkSession
        .builder()
        .config(conf)
        .appName(SparkGeneratePanagaeaDataset.getClass.getSimpleName)
        .master(parser.get("master"))
        .getOrCreate()
    parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}"))
    logger.info("Converting sequential file into Dataset")
    val sc: SparkContext = spark.sparkContext
    val workingPath: String = parser.get("workingPath")
    implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel]
    val inputRDD: RDD[PangaeaDataModel] =
      sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s))
    spark
      .createDataset(inputRDD)
      .as[PangaeaDataModel]
      .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders))
      .groupByKey(_._1)(Encoders.STRING)
      .agg(PangaeaUtils.getDatasetAggregator().toColumn)
      .map(s => s._2)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingPath/dataset")
  }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java
@ -2,6 +2,7 @@
 package eu.dnetlib.dhp.oa.graph.merge;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import java.io.IOException;
 import java.util.Optional;
@ -25,7 +26,23 @@ class MergeGraphTableSparkJobTest {
 	}
 	@Test
-	void testMergeDatasources() throws IOException {
+	void testMerge() throws IOException {
 		Datasource d = MergeGraphTableSparkJob
 			.mergeDatasource(
 				d("datasource_cris.json"),
 				d("datasource_openaire2.0.json"));
 		assertEquals("10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", d.getId());
 		assertNotNull(d.getOriginalId());
 		assertEquals(2, d.getOriginalId().size());
 		assertNotNull(d.getCollectedfrom());
 		assertEquals(2, d.getCollectedfrom().size());
 		assertNotNull(d.getPid());
 		assertEquals(1, d.getPid().size());
 	}
 	@Test
 	void testMergeCompatibility() throws IOException {
 		assertEquals(
 			"openaire-cris_1.1",
 			MergeGraphTableSparkJob
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -835,6 +835,20 @@ class MappersTest {
 		assertEquals("EUR", p.getProcessingchargecurrency().getValue());
 	}
 	@Test
 	void testROHub() throws IOException, DocumentException {
 		final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub.xml")));
 		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
 		System.out.println("***************");
 		System.out.println(new ObjectMapper().writeValueAsString(list));
 		System.out.println("***************");
 //		final Dataset p = (Dataset) list.get(0);
 //		assertValidId(p.getId());
 //		assertValidId(p.getCollectedfrom().get(0).getKey());
 //		System.out.println(p.getTitle().get(0).getValue());
 //		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
 	}
 	private void assertValidId(final String id) {
 		// System.out.println(id);
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
@ -12,8 +12,11 @@ import java.sql.Array;
 import java.sql.Date;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@ -28,12 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
-import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ExtendWith(MockitoExtension.class)
@ -63,22 +61,32 @@ public class MigrateDbEntitiesApplicationTest {
 	}
 	@Test
-	public void testProcessDatasource() throws Exception {
+	public void testProcessService() throws Exception {
-		final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
+		final List<TypedField> fields = prepareMocks("services_resultset_entry.json");
-		final List<Oaf> list = app.processDatasource(rs);
+		final List<Oaf> list = app.processService(rs);
 		assertEquals(1, list.size());
 		verifyMocks(fields);
 		final Datasource ds = (Datasource) list.get(0);
 		assertValidId(ds.getId());
-		assertValidId(ds.getCollectedfrom().get(0).getKey());
+		ds
 			.getCollectedfrom()
 			.stream()
 			.map(KeyValue::getKey)
 			.forEach(dsId -> assertValidId(dsId));
 		assertEquals(1, ds.getPid().size());
 		assertEquals("r3d100010218", ds.getPid().get(0).getValue());
 		assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
 		assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
 		assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
 		assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
 		assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
 		assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
 		assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
 		assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
 		assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
 		assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue());
 		assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
 		assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
 		assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
@ -90,19 +98,98 @@ public class MigrateDbEntitiesApplicationTest {
 		assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
 		assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
 		assertEquals("Data Source", ds.getEosctype().getClassid());
 		assertEquals("Data Source", ds.getEosctype().getClassname());
 		assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
 		assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
 		assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
 		assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
 		assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
 		assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
 		assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
 		assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
 		assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
 		assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
 		assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue());
 		assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue());
 		assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
 		assertEquals(getValueAsString("description", fields), ds.getDescription().getValue());
 		// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
 		assertEquals("0.0", ds.getOdnumberofitems().getValue());
 		assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
 		assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
 		assertEquals(
 			getValueAsList("odlanguages", fields),
 			ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
 		assertEquals(getValueAsList("languages", fields), ds.getLanguages());
 		assertEquals(
 			getValueAsList("accessinfopackage", fields),
 			ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
 		assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
 		assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
 		assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
 		assertEquals(null, ds.getDataprovider());
 		assertEquals(null, ds.getServiceprovider());
 		assertEquals(getValueAsString("databaseaccesstype", fields), ds.getDatabaseaccesstype());
 		assertEquals(getValueAsString("datauploadtype", fields), ds.getDatauploadtype());
 		assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
 		assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
 		assertEquals(false, ds.getVersioning().getValue());
 		assertEquals(false, ds.getVersioncontrol());
 		assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
 		assertEquals(getValueAsString("pidsystems", fields), ds.getPidsystems());
 		assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
 		assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
 		assertEquals("National", ds.getJurisdiction().getClassid());
 		assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
 		assertTrue(ds.getThematic());
 		assertTrue(ds.getKnowledgegraph());
-		assertEquals(1, ds.getContentpolicies().size());
+		HashSet<String> cpSchemeId = ds
-		assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
+			.getContentpolicies()
-		assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
+			.stream()
 			.map(Qualifier::getSchemeid)
 			.collect(Collectors.toCollection(HashSet::new));
 		assertTrue(cpSchemeId.size() == 1);
 		assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
 		HashSet<String> cpSchemeName = ds
 			.getContentpolicies()
 			.stream()
 			.map(Qualifier::getSchemename)
 			.collect(Collectors.toCollection(HashSet::new));
 		assertTrue(cpSchemeName.size() == 1);
 		assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
 		assertEquals(2, ds.getContentpolicies().size());
 		assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
 		assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
 		assertEquals(getValueAsString("submissionpolicyurl", fields), ds.getSubmissionpolicyurl());
 		assertEquals(getValueAsString("preservationpolicyurl", fields), ds.getPreservationpolicyurl());
 		assertEquals(
 			getValueAsList("researchproductaccesspolicies", fields),
 			ds.getResearchproductaccesspolicies());
 		assertEquals(
 			getValueAsList("researchproductmetadataaccesspolicies", fields),
 			ds.getResearchproductmetadataaccesspolicies());
 		assertEquals(true, ds.getConsenttermsofuse());
 		assertEquals(true, ds.getFulltextdownload());
 		assertEquals("2022-03-11", ds.getConsenttermsofusedate());
 		assertEquals("2022-03-11", ds.getLastconsenttermsofusedate());
 	}
 	@Test
@ -154,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
 	public void testProcessDatasourceOrganization() throws Exception {
 		final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
-		final List<Oaf> list = app.processDatasourceOrganization(rs);
+		final List<Oaf> list = app.processServiceOrganization(rs);
 		assertEquals(2, list.size());
 		verifyMocks(fields);
@ -356,18 +443,31 @@ public class MigrateDbEntitiesApplicationTest {
 	}
 	private Float getValueAsFloat(final String name, final List<TypedField> fields) {
-		return new Float(getValueAs(name, fields).toString());
+		final Object value = getValueAs(name, fields);
 		return value != null ? new Float(value.toString()) : null;
 	}
 	private Double getValueAsDouble(final String name, final List<TypedField> fields) {
 		final Object value = getValueAs(name, fields);
 		return value != null ? new Double(value.toString()) : null;
 	}
 	private Integer getValueAsInt(final String name, final List<TypedField> fields) {
 		final Object value = getValueAs(name, fields);
 		return value != null ? new Integer(value.toString()) : null;
 	}
 	private <T> T getValueAs(final String name, final List<TypedField> fields) {
-		return fields
+		final Optional<T> field = fields
 			.stream()
 			.filter(f -> f.getField().equals(name))
 			.map(TypedField::getValue)
 			.filter(Objects::nonNull)
 			.map(o -> (T) o)
 			.findFirst()
-			.get();
+			.map(TypedField::getValue)
 			.map(o -> (T) o);
 		if (!field.isPresent()) {
 			return null;
 		}
 		return field.get();
 	}
 	private List<String> getValueAsList(final String name, final List<TypedField> fields) {
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala
@ -1,29 +0,0 @@
 package eu.dnetlib.dhp.sx.pangaea
 import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils
 import org.junit.jupiter.api.Test
 import java.util.TimeZone
 import java.text.SimpleDateFormat
 import java.util.Date
 import scala.io.Source
 class PangaeaTransformTest {
  @Test
  def test_dateStamp() :Unit ={
    val  d = new Date()
    val s:String =  s"${new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")format d}Z"
    println(s)
    val xml = Source.fromInputStream(getClass.getResourceAsStream("input.xml")).mkString
    println(PangaeaUtils.parseXml(xml))
  }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt
@ -497,6 +497,7 @@ dnet:publication_resource @=@ 0044 @=@ Graduate diploma
 dnet:publication_resource @=@ 0044 @=@ Undergraduate diploma
 dnet:publication_resource @=@ 0000 @=@ UNKNOWN
 dnet:publication_resource @=@ 0042 @=@ EGI Virtual Appliance
 dnet:publication_resource @=@ 0048 @=@ RO-crate
 dnet:languages @=@ abk @=@ ab
 dnet:languages @=@ aar @=@ aa
 dnet:languages @=@ afr @=@ af
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt
@ -164,6 +164,7 @@ dnet:publication_resource @=@ dnet:publication_resource @=@ 0030 @=@ Sound
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0044 @=@ Thesis
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0000 @=@ Unknown
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0042 @=@ Virtual Appliance
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0048 @=@ Research Object
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:frameworkprogram @=@ frameworkprogram
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:program @=@ program
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:specificprogram @=@ specificprogram
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_cris.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_cris.json
@ -1 +1,5 @@
-{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire-cris_1.1" }}
+{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire-cris_1.1" },
  "originalId": ["eurocrisdris::1234"],
  "collectedfrom": [{"key": "eurocrisdris::2b29d08e383ff4cd8a2b6b226ce37e38", "value": "Directory of Research Information System (DRIS)"}],
  "pid": [{"value": "10.1010.xyx", "qualifier": {"classid": "doi"}}]
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_openaire2.0.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/merge/datasource_openaire2.0.json
@ -1 +1,4 @@
-{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire2.0" }}
+{ "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", "openairecompatibility": { "classid": "openaire2.0" },
  "originalId": ["opendoar____::1234"],
  "collectedfrom": [{"key": "openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}]
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json
@ -1,6 +1,6 @@
 [
 	{
-		"field": "datasource",
+		"field": "service",
 		"type": "string",
 		"value": "openaire____::revistasunicauca"
 	},
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml
@ -0,0 +1,103 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <record xmlns:datacite="http://datacite.org/schema/kernel-4"
        xmlns:dc="http://purl.org/dc/elements/1.1/"
        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
        xmlns:oaf="http://namespace.openaire.eu/oaf"
        xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
        xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <header xmlns="http://www.openarchives.org/OAI/2.0/">
        <dri:objIdentifier>eosca5322f5f::4dd1aaf93ae136b65dc9ee4e6f76eac9</dri:objIdentifier>
        <dri:recordIdentifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</dri:recordIdentifier>
        <dri:dateOfCollection>2022-05-25T15:35:48.262Z</dri:dateOfCollection>
        <oaf:datasourceprefix>eosca5322f5f</oaf:datasourceprefix>
        <identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
        <datestamp>2022-05-25T15:35:38Z</datestamp>
        <setSpec>rohub_data</setSpec>
        <setSpec>ro-crate_data</setSpec>
        <dr:dateOfTransformation>2022-05-25T15:36:11.094Z</dr:dateOfTransformation>
    </header>
    <metadata>
        <oaire:resource xmlns="http://namespace.openaire.eu/schema/oaire/">
            <datacite:identifier identifierType="landingPage">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</datacite:identifier>
            <datacite:alternateIdentifiers>
                <datacite:alternateIdentifier alternateIdentifierType="URL">http://api.rohub.org/api/ros/53aa90bf-c593-4e6d-923f-d4711ac4b0e1/</datacite:alternateIdentifier>
            </datacite:alternateIdentifiers>
            <datacite:relatedIdentifiers>
                <datacite:relatedIdentifier relatedIdentifierType="" relationType="">
                    https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb
                </datacite:relatedIdentifier>
                <datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb</datacite:relatedIdentifier>
                <datacite:relatedIdentifier relatedIdentifierType="" relationType="">
                    https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html
                </datacite:relatedIdentifier>
                <datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html</datacite:relatedIdentifier>
            </datacite:relatedIdentifiers>
            <creators xmlns="http://datacite.org/schema/kernel-4">
                <creator>
                    <creator>
                        <creatorName>Anne Fouilloux</creatorName>
                    </creator>
                </creator>
            </creators>
            <dates xmlns="http://datacite.org/schema/kernel-4">
                <date dateType="Created">2021-12-19T21:18:33Z</date>
            </dates>
            <dc:descriptions>
                <dc:description descriptionType="Abstract">The COVID-19 pandemic has led to significant reductions in economic activity, especially during lockdowns. Several studies has shown that the concentration of nitrogen dioxyde and particulate matter levels have reduced during lockdown events. Reductions in transportation sector emissions are most likely largely responsible for the NO2 anomalies. In this study, we analyze the impact of lockdown events on the air quality using data from Copernicus Atmosphere Monitoring Service over Europe and at selected locations.</dc:description>
            </dc:descriptions>
            <oaire:fundingReferences>
                <oaire:fundingReference>
                    <oaire:funderName>European Commission</oaire:funderName>
                    <oaire:funderIdentifier funderIdentifierType="Crossref Funder ID">10.13039/501100000781</oaire:funderIdentifier>
                    <oaire:awardNumber awardURI="">101017502</oaire:awardNumber>
                    <oaire:awardTitle>Research Lifecycle Management for Earth Science Communities and Copernicus Users</oaire:awardTitle>
                </oaire:fundingReference>
            </oaire:fundingReferences>
            <oaire:licenseCondition uri="https://opensource.org/licenses/MIT">MIT License</oaire:licenseCondition>
            <dc:publisher>University of Oslo</dc:publisher>
            <dc:publicationYear>2021</dc:publicationYear>
            <oaire:resourceType resourceTypeGeneral="other research product" uri="http://purl.org/coar/resource_type/c_1843">RO-crate</oaire:resourceType>
            <rightsList xmlns="http://datacite.org/schema/kernel-4">
                <rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</rights>
            </rightsList>
            <sizes xmlns="http://datacite.org/schema/kernel-4">
                <size>11.971 MB</size>
            </sizes>
            <subjects xmlns="http://datacite.org/schema/kernel-4">
                <subject>Applied sciences</subject>
                <subject>Meteorology</subject>
                <subject>EOSC::RO-crate</subject>
            </subjects>
            <titles xmlns="http://datacite.org/schema/kernel-4">
                <title>Impact of the Covid-19 Lockdown on Air quality over Europe</title>
            </titles>
        </oaire:resource>
        <oaf:identifier identifierType="URL">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</oaf:identifier>
        <dr:CobjCategory type="other">0048</dr:CobjCategory>
        <oaf:dateAccepted/>
        <oaf:accessrights>OPEN</oaf:accessrights>
        <oaf:license>https://opensource.org/licenses/MIT</oaf:license>
        <oaf:language>und</oaf:language>
        <oaf:hostedBy id="eosc________::psnc::psnc.rohub" name="ROHub"/>
        <oaf:collectedFrom id="eosc________::psnc::psnc.rohub" name="ROHub"/>
    </metadata>
    <about xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
            <originDescription altered="true" harvestDate="2022-05-25T15:35:48.262Z">
                <baseURL>https%3A%2F%2Fapi.rohub.org%2Fapi%2Foai2d%2F</baseURL>
                <identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
                <datestamp>2022-05-25T15:35:38Z</datestamp>
                <metadataNamespace/>
            </originDescription>
        </provenance>
        <oaf:datainfo>
            <oaf:inferred>false</oaf:inferred>
            <oaf:deletedbyinference>false</oaf:deletedbyinference>
            <oaf:trust>0.9</oaf:trust>
            <oaf:inferenceprovenance/>
            <oaf:provenanceaction classid="sysimport:crosswalk"
                                  classname="Harvested" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
        </oaf:datainfo>
    </about>
 </record>
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json
@ -1,18 +1,51 @@
 [
 	{
-		"field": "datasourceid",
+		"field": "id",
 		"type": "string",
 		"value": "274269ac6f3b::2579-5449"
 	},
 	{
-		"field": "identities",
+		"field": "originalid",
 		"type": "array",
 		"value": [
-			"274269ac6f3b::2579-5449",
+			"fairsharing_::1562",
 			"piwik:13",
-			null
+			null,
 			"re3data_____::r3d100010213"
 		]
 	},
 	{
 		"field": "pid",
 		"type": "array",
 		"value": [
 			"r3d100010218###re3data@@@dnet:pid_types"
 		]
 	},
 	{
 		"field": "datasourcetype",
 		"type": "string",
 		"value": "pubsrepository::journal@@@dnet:datasource_typologies"
 	},
 	{
 		"field": "datasourcetypeui",
 		"type": "string",
 		"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
 	},
 	{
 		"field": "eosctype",
 		"type": "string",
 		"value": "Data Source@@@dnet:eosc_types"
 	},
 	{
 		"field": "eoscdatasourcetype",
 		"type": "string",
 		"value": "Journal archive@@@dnet:eosc_datasource_types"
 	},
 	{
 		"field": "openairecompatibility",
 		"type": "string",
 		"value": "openaire4.0@@@dnet:datasourceCompatibilityLevel"
 	},
 	{
 		"field": "officialname",
 		"type": "string",
@ -23,16 +56,6 @@
 		"type": "string",
 		"value": "Jurnal Ilmiah Pendidikan Scholastic"
 	},
 	{
 		"field": "contactemail",
 		"type": "string",
 		"value": "test@test.it"
 	},
 	{
 		"field": "openairecompatibility",
 		"type": "string",
 		"value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
 	},
 	{
 		"field": "websiteurl",
 		"type": "string",
@ -44,11 +67,14 @@
 		"value": null
 	},
 	{
-		"field": "accessinfopackage",
+		"field": "contactemail",
-		"type": "array",
+		"type": "string",
-		"value": [
+		"value": "test@test.it"
-			null
+	},
-		]
+	{
 		"field": "namespaceprefix",
 		"type": "string",
 		"value": "ojs_25795449"
 	},
 	{
 		"field": "latitude",
@ -61,9 +87,19 @@
 		"value": 0
 	},
 	{
-		"field": "namespaceprefix",
+		"field": "dateofvalidation",
 		"type": "date",
 		"value": null
 	},
 	{
 		"field": "description",
 		"type": "string",
-		"value": "ojs_25795449"
+		"value": "veterinary medicine"
 	},
 	{
 		"field": "subjects",
 		"type": "array",
 		"value": []
 	},
 	{
 		"field": "odnumberofitems",
@ -75,16 +111,6 @@
 		"type": "date",
 		"value": null
 	},
 	{
 		"field": "subjects",
 		"type": "array",
 		"value": null
 	},
 	{
 		"field": "description",
 		"type": "string",
 		"value": null
 	},
 	{
 		"field": "odpolicies",
 		"type": "string",
@ -93,44 +119,33 @@
 	{
 		"field": "odlanguages",
 		"type": "array",
 		"value": []
 	},
 	{
 		"field": "odcontenttypes",
 		"type": "array",
 		"value": [
-			"Journal articles"
+			"English",
 			"German",
 			"French",
 			"Danish",
 			"Norwegian",
 			"Swedish"
 		]
 	},
 	{
-		"field": "inferred",
+		"field": "languages",
-		"type": "boolean",
+		"type": "array",
-		"value": false
+		"value": [
 			"English",
 			"German",
 			"French",
 			"Danish",
 			"Norwegian",
 			"Swedish"
 		]
 	},
 	{
-		"field": "deletedbyinference",
+		"field": "accessinfopackage",
-		"type": "boolean",
+		"type": "array",
-		"value": false
+		"value": [
-	},
+			"http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai"
-	{
+		]
 		"field": "trust",
 		"type": "double",
 		"value": 0.9
 	},
 	{
 		"field": "inferenceprovenance",
 		"type": "string",
 		"value": null
 	},
 	{
 		"field": "dateofcollection",
 		"type": "date",
 		"value": "2020-01-21"
 	},
 	{
 		"field": "dateofvalidation",
 		"type": "date",
 		"value": null
 	},
 	{
 		"field": "releasestartdate",
@ -147,16 +162,6 @@
 		"type": "string",
 		"value": null
 	},
 	{
 		"field": "dataprovider",
 		"type": "boolean",
 		"value": null
 	},
 	{
 		"field": "serviceprovider",
 		"type": "boolean",
 		"value": null
 	},
 	{
 		"field": "databaseaccesstype",
 		"type": "string",
@ -183,12 +188,12 @@
 		"value": null
 	},
 	{
-		"field": "citationguidelineurl",
+		"field": "versioncontrol",
-		"type": "string",
+		"type": "boolean",
 		"value": null
 	},
 	{
-		"field": "qualitymanagementkind",
+		"field": "citationguidelineurl",
 		"type": "string",
 		"value": null
 	},
@ -208,29 +213,38 @@
 		"value": []
 	},
 	{
-		"field": "collectedfromid",
+		"field": "inferred",
-		"type": "string",
+		"type": "boolean",
-		"value": "openaire____::SnVybmFsIEZha3VsdGFzIFNhc3RyYSBVbml2ZXJzaXRhcyBFa2FzYWt0aQ=="
+		"value": false
 	},
 	{
-		"field": "collectedfromname",
+		"field": "deletedbyinference",
-		"type": "string",
+		"type": "boolean",
-		"value": "Jurnal Fakultas Sastra Universitas Ekasakti"
+		"value": false
 	},
 	{
-		"field": "datasourcetype",
+		"field": "trust",
-		"type": "string",
+		"type": "double",
-		"value": "pubsrepository::journal@@@dnet:datasource_typologies"
+		"value": 0.9
 	},
 	{
-		"field": "datasourcetypeui",
+		"field": "inferenceprovenance",
 		"type": "string",
-		"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
+		"value": null
 	},
 	{
-		"field": "provenanceaction",
+		"field": "dateofcollection",
-		"type": "not_used",
+		"type": "date",
-		"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
+		"value": "2020-01-21"
 	},
 	{
 		"field": "collectedfrom",
 		"type": "array",
 		"value": [
 			"openaire____::fairsharing@@@FAIRsharing.org",
 			"openaire____::opendoar@@@OpenDOAR",
 			"openaire____::re3data@@@Registry of Research Data Repository"
 		]
 	},
 	{
 		"field": "issnPrinted",
@ -247,6 +261,13 @@
 		"type": "string",
 		"value": "2579-5447"
 	},
 	{
 		"field": "researchentitytypes",
 		"type": "array",
 		"value": [
 			"Research Data"
 		]
 	},
 	{
 		"field": "jurisdiction",
 		"type": "string",
@ -257,16 +278,36 @@
 		"type": "boolean",
 		"value": true
 	},
 	{
 		"field": "knowledgegraph",
 		"type": "boolean",
 		"value": true
 	},
 	{
 		"field": "contentpolicies",
 		"type": "array",
 		"value": [
-			"Journal article@@@eosc:contentpolicies"
+			"Taxonomic classification@@@eosc:contentpolicies",
 			"Resource collection@@@eosc:contentpolicies"
 		]
 	},
 	{
 		"field": "submissionpolicyurl",
 		"type": "string",
 		"value": null
 	},
 	{
 		"field": "preservationpolicyurl",
 		"type": "string",
 		"value": "Permanent Archiving https://datadryad.org/stash/faq"
 	},
 	{
 		"field": "researchproductaccesspolicies",
 		"type": "array",
 		"value": [
 			"https://100percentit.com/legal/"
 		]
 	},
 	{
 		"field": "researchproductmetadataaccesspolicies",
 		"type": "array",
 		"value": [
 			"https://wenmr.science.uu.nl/conditions"
 		]
 	},
 	{
@ -283,5 +324,10 @@
 		"field": "consenttermsofusedate",
 		"type": "date",
 		"value": "2022-03-11"
 	},
 	{
 		"field": "lastconsenttermsofusedate",
 		"type": "date",
 		"value": "2022-03-11"
 	}
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/datasource/datasource_10.json.gz
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/datasource/datasource_10.json.gz
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -535,6 +535,12 @@ public class XmlRecordFactory implements Serializable {
 				if (ds.getDatasourcetypeui() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
 				}
 				if (ds.getEosctype() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("eosctype", ds.getEosctype()));
 				}
 				if (ds.getEoscdatasourcetype() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("eoscdatasourcetype", ds.getEoscdatasourcetype()));
 				}
 				if (ds.getOpenairecompatibility() != null) {
 					metadata
 						.add(
@ -583,6 +589,16 @@ public class XmlRecordFactory implements Serializable {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
 				}
 				if (ds.getSubjects() != null) {
 					metadata
 						.addAll(
 							ds
 								.getSubjects()
 								.stream()
 								.filter(Objects::nonNull)
 								.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
 								.collect(Collectors.toList()));
 				}
 				if (ds.getOdnumberofitems() != null) {
 					metadata
 						.add(
@ -609,6 +625,16 @@ public class XmlRecordFactory implements Serializable {
 								.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
 								.collect(Collectors.toList()));
 				}
 				if (ds.getLanguages() != null) {
 					metadata
 						.addAll(
 							ds
 								.getLanguages()
 								.stream()
 								.filter(Objects::nonNull)
 								.map(c -> XmlSerializationUtils.asXmlElement("languages", c))
 								.collect(Collectors.toList()));
 				}
 				if (ds.getOdcontenttypes() != null) {
 					metadata
 						.addAll(
@ -689,18 +715,18 @@ public class XmlRecordFactory implements Serializable {
 							XmlSerializationUtils
 								.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
 				}
 				if (ds.getVersioncontrol() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils
 								.asXmlElement("versioncontrol", ds.getVersioncontrol().toString()));
 				}
 				if (ds.getCitationguidelineurl() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils
 								.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
 				}
 				if (ds.getQualitymanagementkind() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils
 								.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
 				}
 				if (ds.getPidsystems() != null) {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
@ -722,17 +748,24 @@ public class XmlRecordFactory implements Serializable {
 				if (ds.getJournal() != null) {
 					metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
 				}
-				if (ds.getSubjects() != null) {
+				if (ds.getResearchentitytypes() != null) {
 					metadata
 						.addAll(
 							ds
-								.getSubjects()
+								.getResearchentitytypes()
 								.stream()
-								.filter(Objects::nonNull)
+								.map(c -> XmlSerializationUtils.asXmlElement("researchentitytypes", c))
-								.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
+								.collect(Collectors.toList()));
 				}
 				if (ds.getProvidedproducttypes() != null) {
 					metadata
 						.addAll(
 							ds
 								.getProvidedproducttypes()
 								.stream()
 								.map(c -> XmlSerializationUtils.asXmlElement("providedproducttypes", c))
 								.collect(Collectors.toList()));
 				}
 				if (ds.getJurisdiction() != null) {
 					metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
 				}
@ -741,11 +774,6 @@ public class XmlRecordFactory implements Serializable {
 					metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
 				}
 				if (ds.getKnowledgegraph() != null) {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
 				}
 				if (ds.getContentpolicies() != null) {
 					metadata
 						.addAll(
@ -756,7 +784,34 @@ public class XmlRecordFactory implements Serializable {
 								.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
 								.collect(Collectors.toList()));
 				}
-
+				if (ds.getSubmissionpolicyurl() != null) {
 					metadata
 						.add(XmlSerializationUtils.asXmlElement("submissionpolicyurl", ds.getSubmissionpolicyurl()));
 				}
 				if (ds.getPreservationpolicyurl() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils.asXmlElement("preservationpolicyurl", ds.getPreservationpolicyurl()));
 				}
 				if (ds.getResearchproductaccesspolicies() != null) {
 					metadata
 						.addAll(
 							ds
 								.getResearchproductaccesspolicies()
 								.stream()
 								.map(c -> XmlSerializationUtils.asXmlElement("researchproductaccesspolicies", c))
 								.collect(Collectors.toList()));
 				}
 				if (ds.getResearchproductmetadataaccesspolicies() != null) {
 					metadata
 						.addAll(
 							ds
 								.getResearchproductmetadataaccesspolicies()
 								.stream()
 								.map(
 									c -> XmlSerializationUtils.asXmlElement("researchproductmetadataaccesspolicies", c))
 								.collect(Collectors.toList()));
 				}
 				break;
 			case organization:
 				final Organization o = (Organization) entity;
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/provision/scholix_index.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/provision/scholix_index.json
@ -41,7 +41,8 @@
        }
      },
      "publicationDate": {
-        "type": "keyword"
+        "type":   "date",
        "format": "yyyy-MM-dd"
      },
      "relationship": {
        "properties": {
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -11,6 +11,7 @@ import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.Test;
@ -142,7 +143,7 @@ public class XmlRecordFactoryTest {
 	}
 	@Test
-	public void testDatasource() throws IOException, DocumentException {
+	public void testService() throws IOException, DocumentException {
 		final ContextMapper contextMapper = new ContextMapper();
 		final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
@ -167,6 +168,14 @@ public class XmlRecordFactoryTest {
 		assertEquals("true", doc.valueOf("//thematic"));
 		assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname"));
 		assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname"));
 		assertEquals("Data Source", doc.valueOf("//eosctype/@classname"));
 		final List pids = doc.selectNodes("//pid");
 		assertEquals(1, pids.size());
 		assertEquals("re3data", ((Element) pids.get(0)).attribute("classid").getValue());
 		assertEquals(
 			"Registry of research data repositories", ((Element) pids.get(0)).attribute("classname").getValue());
 		assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue());
 		assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue());
 	}
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@ -81,3 +81,33 @@ where reltype='resultResult'
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
 create table ${stats_db_name}.result_citations_oc stored as parquet as
 select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.result r1 on rel.source=r1.id
 join ${openaire_db_name}.result r2 on r2.id=rel.target
 where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations'
    and reltype='resultResult'
    and r1.resulttype.classname!=r2.resulttype.classname
    and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
 group by substr(target, 4);
 create table ${stats_db_name}.result_references_oc stored as parquet as
 select substr(source, 4) as id, count(distinct substr(target, 4)) as references
 from ${openaire_db_name}.relation rel
         join ${openaire_db_name}.result r1 on rel.source=r1.id
         join ${openaire_db_name}.result r2 on r2.id=rel.target
 where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations'
  and reltype='resultResult'
  and r1.resulttype.classname!=r2.resulttype.classname
    and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
    and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
    and r1.resulttype.classname != 'other'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
 group by substr(source, 4);
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@ -82,31 +82,31 @@ on r.id= tmp.id;
 compute stats indi_funded_result_with_fundref;
-create table indi_result_org_country_collab stored as parquet as
+-- create table indi_result_org_country_collab stored as parquet as
-with tmp as
+-- with tmp as
-(select o.id as id, o.country , ro.id as result,r.type  from organization o
+-- (select o.id as id, o.country , ro.id as result,r.type  from organization o
-join result_organization ro on o.id=ro.organization
+-- join result_organization ro on o.id=ro.organization
-join result r on r.id=ro.id where o.country <> 'UNKNOWN')
+-- join result r on r.id=ro.id where o.country <> 'UNKNOWN')
-select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
+-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
-from tmp as o1
+-- from tmp as o1
-join tmp as o2 on o1.result=o2.result
+-- join tmp as o2 on o1.result=o2.result
-where o1.id<>o2.id and o1.country<>o2.country
+-- where o1.id<>o2.id and o1.country<>o2.country
-group by o1.id, o1.type,o2.country;
+-- group by o1.id, o1.type,o2.country;
 --
 -- compute stats indi_result_org_country_collab;
-compute stats indi_result_org_country_collab;
+-- create table indi_result_org_collab stored as parquet as
-
+-- with tmp as
-create table indi_result_org_collab stored as parquet as
+-- (select o.id, ro.id as result,r.type  from organization o
-with tmp as
+-- join result_organization ro on o.id=ro.organization
-(select o.id, ro.id as result,r.type  from organization o
+-- join result r on r.id=ro.id)
-join result_organization ro on o.id=ro.organization
+-- select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations
-join result r on r.id=ro.id)
+-- from tmp as o1
-select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations
+-- join tmp as o2 on o1.result=o2.result
-from tmp as o1
+-- where o1.id<>o2.id
-join tmp as o2 on o1.result=o2.result
+-- group by o1.id, o2.id, o1.type;
-where o1.id<>o2.id
+--
-group by o1.id, o2.id, o1.type;
+-- compute stats indi_result_org_collab;
 compute stats indi_result_org_collab;
 create table indi_funder_country_collab stored as parquet as
 with tmp as (select funder, project, country from organization_projects op
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@ -18,28 +18,45 @@ create table TARGET.result stored as parquet as
        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
        union all
        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
-            'openorgs____::759d59f05d77188faee99b7493b46805',
+             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
-            'openorgs____::b84450f9864182c67b8611b5593f4250',
+             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
-            'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975',
+             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
-            'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
+             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
-            'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2',
+             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
-            'openorgs____::d169c7407dd417152596908d48c11460',
+             'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki
-            'openorgs____::1ec924b1759bb16d0a02f2dad8689b21',
+             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
-            'openorgs____::2fb1e47b4612688d9de9169d579939a7',
+             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
-            'openorgs____::759d59f05d77188faee99b7493b46805',
+             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
-            'openorgs____::cad284878801b9465fa51a95b1d779db',
+             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-            'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
+             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
-            'openorgs____::c0286313e36479eff8676dba9b724b40'
+             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
-            -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot
+             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
             'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII)
        ) )) foo;
 compute stats TARGET.result;
 create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_citations;
 create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_references_oc;
 create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_citations_oc;
 create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_classifications;
 create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_apc;
 create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_concepts;
@ -90,11 +107,6 @@ compute stats TARGET.result_sources;
 create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_topics;
 create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_apc;
 create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
 create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
 create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
 SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
 FROM ${openaire_db_name}.relation r
 WHERE r.reltype = 'resultOrganization'
  and r.target like '50|%'
  and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
 CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
 CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
 SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
 FROM ${openaire_db_name}.relation r
-WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
+WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false;
 -- datasource sources:
 -- where the datasource info have been collected from.
--- a/pom.xml
+++ b/pom.xml
@ -801,7 +801,7 @@
 		<mockito-core.version>3.3.3</mockito-core.version>
 		<mongodb.driver.version>3.4.2</mongodb.driver.version>
 		<vtd.version>[2.12,3.0)</vtd.version>
-		<dhp-schemas.version>[2.10.32]</dhp-schemas.version>
+		<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
 		<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
 		<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
 		<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>