merged from beta

2022-04-22 11:55:25 +02:00 · 2022-04-22 11:55:25 +02:00 · 888f2de196
parent 2b06eebdc6 a82ec3aaaf
commit 888f2de196
208 changed files with 8330 additions and 1242 deletions
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@ -0,0 +1,21 @@
+style = defaultWithAlign
+
+align.openParenCallSite = false
+align.openParenDefnSite = false
+align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
+continuationIndent.callSite = 2
+continuationIndent.defnSite = 2
+danglingParentheses = true
+indentOperator = spray
+maxColumn = 120
+newlines.alwaysBeforeTopLevelStatements = true
+project.excludeFilters = [".*\\.sbt"]
+rewrite.rules = [AvoidInfix]
+rewrite.rules = [ExpandImportSelectors]
+rewrite.rules = [RedundantBraces]
+rewrite.rules = [RedundantParens]
+rewrite.rules = [SortImports]
+rewrite.rules = [SortModifiers]
+rewrite.rules = [PreferCurlyFors]
+spaces.inImportCurlyBraces = false
+unindentTopLevelOperators = true
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp-build</artifactId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>

    <artifactId>dhp-build-assembly-resources</artifactId>
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp-build</artifactId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>

    <artifactId>dhp-build-properties-maven-plugin</artifactId>
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@ -5,7 +5,7 @@

    <groupId>eu.dnetlib.dhp</groupId>
    <artifactId>dhp-code-style</artifactId>
-    <version>1.2.4-SNAPSHOT</version>
+    <version>1.2.5-SNAPSHOT</version>

    <packaging>jar</packaging>

@ -47,12 +47,16 @@
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-site-plugin</artifactId>
                    <version>3.9.1</version>
+                    <configuration>
+                        <skip>true</skip>
+                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>

    <properties>
+
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
    </properties>
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@ -4,7 +4,7 @@
 	<parent>
 		<groupId>eu.dnetlib.dhp</groupId>
 		<artifactId>dhp</artifactId>
-		<version>1.2.4-SNAPSHOT</version>
+		<version>1.2.5-SNAPSHOT</version>
 	</parent>
 	<artifactId>dhp-build</artifactId>
 	<packaging>pom</packaging>
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -5,7 +5,7 @@
 	<parent>
 		<groupId>eu.dnetlib.dhp</groupId>
 		<artifactId>dhp</artifactId>
-		<version>1.2.4-SNAPSHOT</version>
+		<version>1.2.5-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>

 	</parent>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java
@ -0,0 +1,40 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class DecompressTarGz {
+
+	public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException {
+
+		FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath));
+		try (TarArchiveInputStream tais = new TarArchiveInputStream(
+			new GzipCompressorInputStream(inputFileStream))) {
+			TarArchiveEntry entry = null;
+			while ((entry = tais.getNextTarEntry()) != null) {
+				if (!entry.isDirectory()) {
+					try (
+						FSDataOutputStream out = fs
+							.create(new Path(outputPath.concat(entry.getName()).concat(".gz")));
+						GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
+
+						IOUtils.copy(tais, gzipOs);
+
+					}
+
+				}
+			}
+		}
+	}
+}
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.dedup;
+package eu.dnetlib.dhp.oa.merge;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob {
 					.requireNonNull(
 						DispatchEntitiesSparkJob.class
 							.getResourceAsStream(
-								"/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")));
+								"/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json")));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);

--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.dedup;
+package eu.dnetlib.dhp.oa.merge;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob {
 			.toString(
 				GroupEntitiesSparkJob.class
 					.getResourceAsStream(
-						"/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json"));
+						"/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -47,6 +47,17 @@ public class OafMapperUtils {
 	}

 	public static Result mergeResults(Result left, Result right) {
+
+		final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
+		final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
+
+		if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
+			return left;
+		}
+		if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
+			return right;
+		}
+
 		if (new ResultTypeComparator().compare(left, right) < 0) {
 			left.mergeFrom(right);
 			return left;
@ -56,6 +67,18 @@ public class OafMapperUtils {
 		}
 	}

+	private static boolean isFromDelegatedAuthority(Result r) {
+		return Optional
+			.ofNullable(r.getInstance())
+			.map(
+				instance -> instance
+					.stream()
+					.filter(i -> Objects.nonNull(i.getCollectedfrom()))
+					.map(i -> i.getCollectedfrom().getKey())
+					.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
+			.orElse(false);
+	}
+
 	public static KeyValue keyValue(final String k, final String v) {
 		final KeyValue kv = new KeyValue();
 		kv.setKey(k);
@ -368,4 +391,19 @@ public class OafMapperUtils {
 		}
 		return null;
 	}
+
+	public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) {
+		KeyValue kv = new KeyValue();
+		kv.setDataInfo(dataInfo);
+		kv.setKey(key);
+		kv.setValue(value);
+		return kv;
+	}
+
+	public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
+		Measure m = new Measure();
+		m.setId(id);
+		m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
+		return m;
+	}
 }
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -185,6 +185,22 @@ class OafMapperUtilsTest {
 				.getClassid());
 	}

+	@Test
+	void testDelegatedAuthority() throws IOException {
+		Dataset d1 = read("dataset_2.json", Dataset.class);
+		Dataset d2 = read("dataset_delegated.json", Dataset.class);
+
+		assertEquals(1, d2.getCollectedfrom().size());
+		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
+
+		Result res = OafMapperUtils.mergeResults(d1, d2);
+
+		assertEquals(d2, res);
+
+		System.out.println(OBJECT_MAPPER.writeValueAsString(res));
+
+	}
+
 	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
 		return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
 	}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
@ -1 +1,140 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
+  "resuttype": {"classid": "dataset"},
+  "pid": [
+    {
+      "qualifier": {"classid": "doi"},
+      "value": "10.1016/j.cmet.2011.03.013"
+    },
+    {
+      "qualifier": {"classid": "urn"},
+      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+    },
+    {
+      "qualifier": {"classid": "scp-number"},
+      "value": "79953761260"
+    },
+    {
+      "qualifier": {"classid": "pmc"},
+      "value": "21459329"
+    }
+  ],
+  "collectedfrom": [
+    {
+      "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
+      "value": "Repository B"
+    }
+  ],
+  "instance": [
+    {
+      "refereed": {
+        "classid": "0000",
+        "classname": "UNKNOWN",
+        "schemeid": "dnet:review_levels",
+        "schemename": "dnet:review_levels"
+      },
+      "hostedby": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "accessright": {
+        "classid": "OPEN",
+        "classname": "Open Access",
+        "schemeid": "dnet:access_modes",
+        "schemename": "dnet:access_modes"
+      },
+      "processingchargecurrency": {
+        "dataInfo": {
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:datasetarchive",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "deletedbyinference": false,
+          "inferred": false,
+          "inferenceprovenance": "",
+          "invisible": true,
+          "trust": "0.9"
+        },
+        "value": "EUR"
+      },
+      "pid": [
+        {
+          "dataInfo": {
+            "provenanceaction": {
+              "classid": "sysimport:crosswalk:datasetarchive",
+              "classname": "Harvested",
+              "schemeid": "dnet:provenanceActions",
+              "schemename": "dnet:provenanceActions"
+            },
+            "deletedbyinference": false,
+            "inferred": false,
+            "inferenceprovenance": "",
+            "invisible": true,
+            "trust": "0.9"
+          },
+          "qualifier": {
+            "classid": "doi",
+            "classname": "Digital Object Identifier",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "10.1371/journal.pone.0085605"
+        }
+      ],
+      "distributionlocation": "",
+      "url": ["https://doi.org/10.1371/journal.pone.0085605"],
+      "alternateIdentifier": [
+        {
+          "dataInfo": {
+            "provenanceaction": {
+              "classid": "sysimport:crosswalk:datasetarchive",
+              "classname": "Harvested",
+              "schemeid": "dnet:provenanceActions",
+              "schemename": "dnet:provenanceActions"
+            },
+            "deletedbyinference": false,
+            "inferred": false,
+            "inferenceprovenance": "",
+            "invisible": true,
+            "trust": "0.9"
+          },
+          "qualifier": {
+            "classid": "pmid",
+            "classname": "PubMed ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "24454899.0"
+        }
+      ],
+      "collectedfrom": {
+        "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
+        "value": "Repository B"
+      },
+      "processingchargeamount": {
+        "dataInfo": {
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:datasetarchive",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "deletedbyinference": false,
+          "inferred": false,
+          "inferenceprovenance": "",
+          "invisible": true,
+          "trust": "0.9"
+        },
+        "value": "1022.02"
+      },
+      "instancetype": {
+        "classid": "0004",
+        "classname": "Conference object",
+        "schemeid": "dnet:publication_resource",
+        "schemename": "dnet:publication_resource"
+      }
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json
@ -0,0 +1,140 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
+  "resuttype": {"classid": "dataset"},
+  "pid": [
+    {
+      "qualifier": {"classid": "doi"},
+      "value": "10.1016/j.cmet.2011.03.013"
+    },
+    {
+      "qualifier": {"classid": "urn"},
+      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+    },
+    {
+      "qualifier": {"classid": "scp-number"},
+      "value": "79953761260"
+    },
+    {
+      "qualifier": {"classid": "pmc"},
+      "value": "21459329"
+    }
+  ],
+  "collectedfrom": [
+    {
+      "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+      "value": "Zenodo"
+    }
+  ],
+  "instance": [
+    {
+      "refereed": {
+        "classid": "0000",
+        "classname": "UNKNOWN",
+        "schemeid": "dnet:review_levels",
+        "schemename": "dnet:review_levels"
+      },
+      "hostedby": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "accessright": {
+        "classid": "OPEN",
+        "classname": "Open Access",
+        "schemeid": "dnet:access_modes",
+        "schemename": "dnet:access_modes"
+      },
+      "processingchargecurrency": {
+        "dataInfo": {
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:datasetarchive",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "deletedbyinference": false,
+          "inferred": false,
+          "inferenceprovenance": "",
+          "invisible": true,
+          "trust": "0.9"
+        },
+        "value": "EUR"
+      },
+      "pid": [
+        {
+          "dataInfo": {
+            "provenanceaction": {
+              "classid": "sysimport:crosswalk:datasetarchive",
+              "classname": "Harvested",
+              "schemeid": "dnet:provenanceActions",
+              "schemename": "dnet:provenanceActions"
+            },
+            "deletedbyinference": false,
+            "inferred": false,
+            "inferenceprovenance": "",
+            "invisible": true,
+            "trust": "0.9"
+          },
+          "qualifier": {
+            "classid": "doi",
+            "classname": "Digital Object Identifier",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "10.1371/journal.pone.0085605"
+        }
+      ],
+      "distributionlocation": "",
+      "url": ["https://doi.org/10.1371/journal.pone.0085605"],
+      "alternateIdentifier": [
+        {
+          "dataInfo": {
+            "provenanceaction": {
+              "classid": "sysimport:crosswalk:datasetarchive",
+              "classname": "Harvested",
+              "schemeid": "dnet:provenanceActions",
+              "schemename": "dnet:provenanceActions"
+            },
+            "deletedbyinference": false,
+            "inferred": false,
+            "inferenceprovenance": "",
+            "invisible": true,
+            "trust": "0.9"
+          },
+          "qualifier": {
+            "classid": "pmid",
+            "classname": "PubMed ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "24454899.0"
+        }
+      ],
+      "collectedfrom": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "processingchargeamount": {
+        "dataInfo": {
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:datasetarchive",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "deletedbyinference": false,
+          "inferred": false,
+          "inferenceprovenance": "",
+          "invisible": true,
+          "trust": "0.9"
+        },
+        "value": "1022.02"
+      },
+      "instancetype": {
+        "classid": "0004",
+        "classname": "Conference object",
+        "schemeid": "dnet:publication_resource",
+        "schemename": "dnet:publication_resource"
+      }
+    }
+  ]
+}
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@ -4,7 +4,7 @@
    <parent>
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp-workflows</artifactId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <artifactId>dhp-actionmanager</artifactId>

--- a/dhp-workflows/dhp-aggregation/.scalafmt.conf
+++ b/dhp-workflows/dhp-aggregation/.scalafmt.conf
@ -0,0 +1,21 @@
+style = defaultWithAlign
+
+align.openParenCallSite = false
+align.openParenDefnSite = false
+align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
+continuationIndent.callSite = 2
+continuationIndent.defnSite = 2
+danglingParentheses = true
+indentOperator = spray
+maxColumn = 120
+newlines.alwaysBeforeTopLevelStatements = true
+project.excludeFilters = [".*\\.sbt"]
+rewrite.rules = [AvoidInfix]
+rewrite.rules = [ExpandImportSelectors]
+rewrite.rules = [RedundantBraces]
+rewrite.rules = [RedundantParens]
+rewrite.rules = [SortImports]
+rewrite.rules = [SortModifiers]
+rewrite.rules = [PreferCurlyFors]
+spaces.inImportCurlyBraces = false
+unindentTopLevelOperators = true
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@ -4,7 +4,7 @@
    <parent>
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp-workflows</artifactId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <artifactId>dhp-aggregation</artifactId>
    <build>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
@ -27,6 +27,8 @@ public class Constants {
 	public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
 	public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
 	public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg";
+	public static final String UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID = "measure:usage_counts";
+	public static final String UPDATE_KEY_USAGE_COUNTS = "count";

 	public static final String FOS_CLASS_ID = "FOS";
 	public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.dhp.utils.DHPUtils;

 public class PrepareFOSSparkJob implements Serializable {
@ -71,6 +73,7 @@ public class PrepareFOSSparkJob implements Serializable {
 				Result r = new Result();
 				FOSDataModel first = it.next();
 				r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
+
 				HashSet<String> level1 = new HashSet<>();
 				HashSet<String> level2 = new HashSet<>();
 				HashSet<String> level3 = new HashSet<>();
@ -81,6 +84,19 @@ public class PrepareFOSSparkJob implements Serializable {
 				level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
 				level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
 				r.setSubject(sbjs);
+				r
+					.setDataInfo(
+						OafMapperUtils
+							.dataInfo(
+								false, null, true,
+								false,
+								OafMapperUtils
+									.qualifier(
+										ModelConstants.PROVENANCE_ENRICH,
+										null,
+										ModelConstants.DNET_PROVENANCE_ACTIONS,
+										ModelConstants.DNET_PROVENANCE_ACTIONS),
+								null));
 				return r;
 			}, Encoders.bean(Result.class))
 			.write()
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java
@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import eu.dnetlib.dhp.utils.DHPUtils;

 public class PrepareSDGSparkJob implements Serializable {
@ -78,6 +80,19 @@ public class PrepareSDGSparkJob implements Serializable {
 						s -> sbjs
 							.add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)));
 				r.setSubject(sbjs);
+				r
+					.setDataInfo(
+						OafMapperUtils
+							.dataInfo(
+								false, null, true,
+								false,
+								OafMapperUtils
+									.qualifier(
+										ModelConstants.PROVENANCE_ENRICH,
+										null,
+										ModelConstants.DNET_PROVENANCE_ACTIONS,
+										ModelConstants.DNET_PROVENANCE_ACTIONS),
+								null));
 				return r;
 			}, Encoders.bean(Result.class))
 			.write()
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.actionmanager.Constants.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.Serializable;
+import java.util.Optional;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
@ -67,7 +68,19 @@ public class SparkSaveUnresolved implements Serializable {
 			.groupByKey((MapFunction<Result, String>) Result::getId, Encoders.STRING())
 			.mapGroups((MapGroupsFunction<String, Result, Result>) (k, it) -> {
 				Result ret = it.next();
-				it.forEachRemaining(r -> ret.mergeFrom(r));
+				it.forEachRemaining(r -> {
+					if (r.getInstance() != null) {
+						ret.setInstance(r.getInstance());
+					}
+					if (r.getSubject() != null) {
+						if (ret.getSubject() != null)
+							ret.getSubject().addAll(r.getSubject());
+						else
+							ret.setSubject(r.getSubject());
+					}
+
+					// ret.mergeFrom(r)
+				});
 				return ret;
 			}, Encoders.bean(Result.class))
 			.write()
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@ -21,6 +22,7 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -83,10 +85,13 @@ public class CreateActionSetSparkJob implements Serializable {
 	private static void extractContent(SparkSession spark, String inputPath, String outputPath,
 		boolean shouldDuplicateRels) {
 		spark
-			.sqlContext()
-			.createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING())
+			.read()
+			.textFile(inputPath + "/*")
+			.map(
+				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
+				Encoders.bean(COCI.class))
 			.flatMap(
-				(FlatMapFunction<String, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
+				(FlatMapFunction<COCI, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
 				Encoders.bean(Relation.class))
 			.filter((FilterFunction<Relation>) value -> value != null)
 			.toJavaRDD()
@ -98,26 +103,30 @@ public class CreateActionSetSparkJob implements Serializable {

 	}

-	private static List<Relation> createRelation(String value, boolean duplicate) {
-		String[] line = value.split(",");
-		if (!line[1].startsWith("10.")) {
-			return new ArrayList<>();
-		}
+	private static List<Relation> createRelation(COCI value, boolean duplicate) {
+
 		List<Relation> relationList = new ArrayList<>();

-		String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1]));
-		final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2]));
+		String citing = ID_PREFIX
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting()));
+		final String cited = ID_PREFIX
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));

-		relationList
-			.addAll(
-				getRelations(
-					citing,
-					cited));
+		if (!citing.equals(cited)) {
+			relationList
+				.addAll(
+					getRelations(
+						citing,
+						cited));

-		if (duplicate && line[1].endsWith(".refs")) {
-			citing = ID_PREFIX + IdentifierFactory
-				.md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs"))));
-			relationList.addAll(getRelations(citing, cited));
+			if (duplicate && value.getCiting().endsWith(".refs")) {
+				citing = ID_PREFIX + IdentifierFactory
+					.md5(
+						CleaningFunctions
+							.normalizePidValue(
+								"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
+				relationList.addAll(getRelations(citing, cited));
+			}
 		}

 		return relationList;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -0,0 +1,103 @@
+
+package eu.dnetlib.dhp.actionmanager.opencitations;
+
+import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
+import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class ReadCOCI implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(ReadCOCI.class);
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				ReadCOCI.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);
+
+		final String[] inputFile = parser.get("inputFile").split(";");
+		log.info("inputFile {}", inputFile.toString());
+		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String workingPath = parser.get("workingPath");
+		log.info("workingPath {}", workingPath);
+
+		SparkConf sconf = new SparkConf();
+
+		final String delimiter = Optional
+			.ofNullable(parser.get("delimiter"))
+			.orElse(DEFAULT_DELIMITER);
+
+		runWithSparkSession(
+			sconf,
+			isSparkSessionManaged,
+			spark -> {
+				doRead(
+					spark,
+					workingPath,
+					inputFile,
+					outputPath,
+					delimiter);
+			});
+	}
+
+	private static void doRead(SparkSession spark, String workingPath, String[] inputFiles,
+		String outputPath,
+		String delimiter) throws IOException {
+
+		for (String inputFile : inputFiles) {
+			String p_string = workingPath + "/" + inputFile + ".gz";
+
+			Dataset<Row> cociData = spark
+				.read()
+				.format("csv")
+				.option("sep", delimiter)
+				.option("inferSchema", "true")
+				.option("header", "true")
+				.option("quotes", "\"")
+				.load(p_string)
+				.repartition(100);
+
+			cociData.map((MapFunction<Row, COCI>) row -> {
+				COCI coci = new COCI();
+				coci.setOci(row.getString(0));
+				coci.setCiting(row.getString(1));
+				coci.setCited(row.getString(2));
+				return coci;
+			}, Encoders.bean(COCI.class))
+				.write()
+				.mode(SaveMode.Overwrite)
+				.option("compression", "gzip")
+				.json(outputPath + inputFile);
+		}
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
@ -0,0 +1,39 @@
+
+package eu.dnetlib.dhp.actionmanager.opencitations.model;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByPosition;
+
+public class COCI implements Serializable {
+	private String oci;
+
+	private String citing;
+
+	private String cited;
+
+	public String getOci() {
+		return oci;
+	}
+
+	public void setOci(String oci) {
+		this.oci = oci;
+	}
+
+	public String getCiting() {
+		return citing;
+	}
+
+	public void setCiting(String citing) {
+		this.citing = citing;
+	}
+
+	public String getCited() {
+		return cited;
+	}
+
+	public void setCited(String cited) {
+		this.cited = cited;
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.ror;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
@ -29,8 +30,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
@ -38,8 +38,8 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob;
 import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
+import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
 import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -48,8 +48,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;
@ -112,24 +114,21 @@ public class GenerateRorActionSetJob {
 		final String outputPath) throws IOException {

 		readInputPath(spark, inputPath)
-			.map(
-				(MapFunction<RorOrganization, Organization>) GenerateRorActionSetJob::convertRorOrg,
-				Encoders.bean(Organization.class))
-			.toJavaRDD()
-			.map(o -> new AtomicAction<>(Organization.class, o))
+			.map(GenerateRorActionSetJob::convertRorOrg)
+			.flatMap(List::iterator)
 			.mapToPair(
 				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
 					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
 	}

-	protected static Organization convertRorOrg(final RorOrganization r) {
+	protected static List<AtomicAction<? extends Oaf>> convertRorOrg(final RorOrganization r) {

 		final Date now = new Date();

 		final Organization o = new Organization();

-		o.setId(String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(r.getId())));
+		o.setId(calculateOpenaireId(r.getId()));
 		o.setOriginalId(Arrays.asList(String.format("%s::%s", ROR_NS_PREFIX, r.getId())));
 		o.setCollectedfrom(ROR_COLLECTED_FROM);
 		o.setPid(pids(r));
@ -166,7 +165,43 @@ public class GenerateRorActionSetJob {
 		o.setDataInfo(ROR_DATA_INFO);
 		o.setLastupdatetimestamp(now.getTime());

-		return o;
+		final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
+		res.add(new AtomicAction<>(Organization.class, o));
+
+		for (final Relationship rorRel : r.getRelationships()) {
+			if (rorRel.getType().equalsIgnoreCase("parent")) {
+				final String orgId1 = calculateOpenaireId(r.getId());
+				final String orgId2 = calculateOpenaireId(rorRel.getId());
+				res
+					.add(
+						new AtomicAction<>(Relation.class,
+							calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
+				res
+					.add(
+						new AtomicAction<>(Relation.class,
+							calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
+			}
+		}
+
+		return res;
+
+	}
+
+	private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
+		final Relation rel = new Relation();
+		rel.setSource(source);
+		rel.setTarget(target);
+		rel.setRelType(ORG_ORG_RELTYPE);
+		rel.setSubRelType(ModelConstants.RELATIONSHIP);
+		rel.setRelClass(relClass);
+		rel.setCollectedfrom(ROR_COLLECTED_FROM);
+		rel.setDataInfo(ROR_DATA_INFO);
+		rel.setLastupdatetimestamp(System.currentTimeMillis());
+		return rel;
+	}
+
+	private static String calculateOpenaireId(final String rorId) {
+		return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
 	}

 	private static List<StructuredProperty> pids(final RorOrganization r) {
@ -202,14 +237,14 @@ public class GenerateRorActionSetJob {
 			.collect(Collectors.toList());
 	}

-	private static Dataset<RorOrganization> readInputPath(
+	private static JavaRDD<RorOrganization> readInputPath(
 		final SparkSession spark,
 		final String path) throws IOException {

 		try (final FileSystem fileSystem = FileSystem.get(new Configuration());
 			final InputStream is = fileSystem.open(new Path(path))) {
 			final RorOrganization[] arr = OBJECT_MAPPER.readValue(is, RorOrganization[].class);
-			return spark.createDataset(Arrays.asList(arr), Encoders.bean(RorOrganization.class));
+			return spark.createDataset(Arrays.asList(arr), Encoders.bean(RorOrganization.class)).toJavaRDD();
 		}
 	}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
@ -0,0 +1,149 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import static eu.dnetlib.dhp.actionmanager.Constants.*;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.DataInfo;
+import eu.dnetlib.dhp.schema.oaf.Measure;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+
+/**
+ * created the Atomic Action for each type of results
+ */
+public class SparkAtomicActionUsageJob implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionUsageJob.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkAtomicActionUsageJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		SparkConf conf = new SparkConf();
+		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
+
+		final String dbname = parser.get("usagestatsdb");
+
+		final String workingPath = parser.get("workingPath");
+
+		runWithSparkHiveSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				prepareResults(dbname, spark, workingPath);
+				prepareActionSet(spark, workingPath, outputPath);
+			});
+	}
+
+	public static void prepareResults(String db, SparkSession spark, String workingPath) {
+		spark
+			.sql(
+				"Select result_id, downloads, views " +
+					"from " + db + ".usage_stats")
+			.as(Encoders.bean(UsageStatsModel.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath);
+	}
+
+	public static void prepareActionSet(SparkSession spark, String inputPath, String outputPath) {
+		readPath(spark, inputPath, UsageStatsModel.class)
+			.groupByKey((MapFunction<UsageStatsModel, String>) us -> us.getResult_id(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, UsageStatsModel, Result>) (k, it) -> {
+				UsageStatsModel first = it.next();
+				it.forEachRemaining(us -> {
+					first.setDownloads(first.getDownloads() + us.getDownloads());
+					first.setViews(first.getViews() + us.getViews());
+				});
+
+				Result res = new Result();
+				res.setId("50|" + k);
+
+				res.setMeasures(getMeasure(first.getDownloads(), first.getViews()));
+				return res;
+			}, Encoders.bean(Result.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath);
+	}
+
+	private static List<Measure> getMeasure(Long downloads, Long views) {
+		DataInfo dataInfo = OafMapperUtils
+			.dataInfo(
+				false,
+				UPDATE_DATA_INFO_TYPE,
+				true,
+				false,
+				OafMapperUtils
+					.qualifier(
+						UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
+						UPDATE_CLASS_NAME,
+						ModelConstants.DNET_PROVENANCE_ACTIONS,
+						ModelConstants.DNET_PROVENANCE_ACTIONS),
+				"");
+
+		return Arrays
+			.asList(
+				OafMapperUtils
+					.newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo),
+				OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo));
+
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+	public static <R> Dataset<R> readPath(
+		SparkSession spark, String inputPath, Class<R> clazz) {
+		return spark
+			.read()
+			.textFile(inputPath)
+			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java
@ -0,0 +1,34 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import java.io.Serializable;
+
+public class UsageStatsModel implements Serializable {
+	private String result_id;
+	private Long downloads;
+	private Long views;
+
+	public String getResult_id() {
+		return result_id;
+	}
+
+	public void setResult_id(String result_id) {
+		this.result_id = result_id;
+	}
+
+	public Long getDownloads() {
+		return downloads;
+	}
+
+	public void setDownloads(Long downloads) {
+		this.downloads = downloads;
+	}
+
+	public Long getViews() {
+		return views;
+	}
+
+	public void setViews(Long views) {
+		this.views = views;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
@ -0,0 +1,37 @@
+[
+  {
+    "paramName": "wp",
+    "paramLongName": "workingPath",
+    "paramDescription": "the zipped opencitations file",
+    "paramRequired": true
+  },
+
+
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  },
+  {
+    "paramName": "d",
+    "paramLongName": "delimiter",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "outputPath",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": true
+  },
+  {
+    "paramName": "if",
+    "paramLongName": "inputFile",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": true
+  }
+]
+
+
+
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
@ -26,6 +26,7 @@
        <switch>
            <case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
            <case to="extract">${wf:conf('resumeFrom') eq 'ExtractContent'}</case>
+            <case to="read">${wf:conf('resumeFrom') eq 'ReadContent'}</case>
            <default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
        </switch>
    </decision>
@ -60,6 +61,32 @@
            <arg>--inputFile</arg><arg>${inputFile}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
        </java>
+        <ok to="read"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="read">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the AS for OC</name>
+            <class>eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--workingPath</arg><arg>${workingPath}/COCI</arg>
+            <arg>--outputPath</arg><arg>${workingPath}/COCI_JSON/</arg>
+            <arg>--delimiter</arg><arg>${delimiter}</arg>
+            <arg>--inputFile</arg><arg>${inputFileCoci}</arg>
+        </spark>
        <ok to="create_actionset"/>
        <error to="Kill"/>
    </action>
@ -81,7 +108,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--inputPath</arg><arg>${workingPath}/COCI</arg>
+            <arg>--inputPath</arg><arg>${workingPath}/COCI_JSON</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
@ -0,0 +1,32 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "hmu",
+    "paramLongName": "hive_metastore_uris",
+    "paramDescription": "the URI for the hive metastore",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  },
+  {
+    "paramName": "sdb",
+    "paramLongName": "usagestatsdb",
+    "paramDescription": "the name of the db to be used",
+    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingPath",
+    "paramDescription": "the workingPath where to save the content of the usage_stats table",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml
@ -0,0 +1,30 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
@ -0,0 +1,99 @@
+<workflow-app name="UsageStatsCounts" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>outputPath</name>
+            <description>the path where to store the actionset</description>
+        </property>
+        <property>
+            <name>usagestatsdb</name>
+            <description>the name of the db to be used</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+    <start to="atomicactions"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+
+    <action name="atomicactions">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the usage stats count for results</name>
+            <class>eu.dnetlib.dhp.actionmanager.usagestats.SparkAtomicActionUsageJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--hive_metastore_uris</arg><arg>${hiveMetastoreUris}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--usagestatsdb</arg><arg>${usagestatsdb}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/usageDb</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py
@ -0,0 +1,63 @@
+from urllib.request import urlopen
+import json
+
+
+def retrieve_datacite_clients(base_url):
+    datacite_clients = {}
+    while base_url is not None:
+        with urlopen(base_url) as response:
+            print(f"requesting {base_url}")
+            response_content = response.read()
+            data = json.loads(response_content)
+            if 'data' in data and len(data['data'])>0:
+                for item in data['data']:
+                    datacite_clients[item['id'].lower()]= item['attributes']['re3data'].lower().replace("https://doi.org/","")
+                base_url = data['links']['next']
+            else:
+                base_url = None
+    return datacite_clients
+
+
+def retrieve_r3data(start_url):
+    r3data_clients = {}
+    page_number = 1
+    base_url = start_url
+    while base_url is not None:
+        with urlopen(base_url) as response:
+            print(f"requesting {base_url}")
+            response_content = response.read()
+            data = json.loads(response_content)
+            if 'data' in data and len(data['data'])>0:
+                for item in data['data']:
+                    r3data_clients[item['id'].lower()]= dict(
+                        openaire_id= "re3data_____::"+item['attributes']['re3dataId'].lower(),
+                    official_name=item['attributes']['repositoryName']
+                    )
+                page_number +=1
+                base_url = f"{start_url}&page[number]={page_number}"
+            else:
+                base_url = None
+    return r3data_clients
+
+
+
+
+
+
+base_url ="https://api.datacite.org/clients?query=re3data_id:*&page[size]=250"
+
+dc = retrieve_datacite_clients(base_url)
+r3 = retrieve_r3data("https://api.datacite.org/re3data?page[size]=250")
+
+result = {}
+
+for item in dc:
+    res = dc[item].lower()
+    if res not in r3:
+        print(f"missing {res} for {item} in dictionary")
+    else:
+        result[item.upper()]= dict(openaire_id=r3[res]["openaire_id"],datacite_name=r3[res]["official_name"], official_name=r3[res]["official_name"] )
+
+
+with open('hostedBy_map.json', 'w', encoding='utf8') as json_file:
+    json.dump(result, json_file, ensure_ascii=False, indent=1)
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala
@ -49,7 +49,7 @@ abstract class AbstractRestClient extends Iterator[String] {
  }

  private def doHTTPRequest[A <: HttpUriRequest](r: A): String = {
-    val timeout = 60; // seconds
+    val timeout = 600; // seconds
    val config = RequestConfig
      .custom()
      .setConnectTimeout(timeout * 1000)
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala
@ -46,7 +46,7 @@ object ImportDatacite {
      Source
        .fromInputStream(
          getClass.getResourceAsStream(
-            "/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json"
+            "/eu/dnetlib/dhp/datacite/import_from_api.json"
          )
        )
        .mkString
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java
@ -146,6 +146,11 @@ public class PrepareTest {
 					.get(0)
 					.getValue());

+		final String doi2 = "unresolved::10.3390/s18072310::doi";
+
+		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count());
+		Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size());
+
 	}

 	@Test
@ -259,59 +264,61 @@ public class PrepareTest {
 				.collect()
 				.contains("8. Economic growth"));

-	}
-
-	@Test
-	void test3() throws Exception {
-		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz";
-
-		final String outputPath = workingDir.toString() + "/fos.json";
-		GetFOSSparkJob
-			.main(
-				new String[] {
-					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-					"--sourcePath", sourcePath,
-
-					"-outputPath", outputPath
-
-				});
-
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-		JavaRDD<FOSDataModel> tmp = sc
-			.textFile(outputPath)
-			.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
-
-		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
-		tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
-		tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
-		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
+		Assertions.assertEquals(32, tmp.filter(row -> row.getDataInfo() != null).count());

 	}

-	@Test
-	void test4() throws Exception {
-		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz";
-
-		final String outputPath = workingDir.toString() + "/sdg.json";
-		GetSDGSparkJob
-			.main(
-				new String[] {
-					"--isSparkSessionManaged", Boolean.FALSE.toString(),
-					"--sourcePath", sourcePath,
-
-					"-outputPath", outputPath
-
-				});
-
-		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-		JavaRDD<SDGDataModel> tmp = sc
-			.textFile(outputPath)
-			.map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class));
-
-		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
-		tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null));
-
-	}
+//	@Test
+//	void test3() throws Exception {
+//		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz";
+//
+//		final String outputPath = workingDir.toString() + "/fos.json";
+//		GetFOSSparkJob
+//			.main(
+//				new String[] {
+//					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+//					"--sourcePath", sourcePath,
+//
+//					"-outputPath", outputPath
+//
+//				});
+//
+//		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+//
+//		JavaRDD<FOSDataModel> tmp = sc
+//			.textFile(outputPath)
+//			.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
+//
+//		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
+//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
+//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
+//		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
+//
+//	}
+//
+//	@Test
+//	void test4() throws Exception {
+//		final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz";
+//
+//		final String outputPath = workingDir.toString() + "/sdg.json";
+//		GetSDGSparkJob
+//			.main(
+//				new String[] {
+//					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+//					"--sourcePath", sourcePath,
+//
+//					"-outputPath", outputPath
+//
+//				});
+//
+//		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+//
+//		JavaRDD<SDGDataModel> tmp = sc
+//			.textFile(outputPath)
+//			.map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class));
+//
+//		tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null));
+//		tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null));
+//
+//	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java
@ -196,6 +196,9 @@ public class ProduceTest {
 		final String doi = "unresolved::10.3390/s18072310::doi";
 		JavaRDD<Result> tmp = getResultJavaRDD();

+		tmp
+			.filter(row -> row.getId().equals(doi))
+			.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
 		Assertions
 			.assertEquals(
 				3, tmp
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java
@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -99,7 +99,7 @@ public class CreateOpenCitationsASTest {
 			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
 			.map(aa -> ((Relation) aa.getPayload()));

-		assertEquals(60, tmp.count());
+		assertEquals(62, tmp.count());

 		// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));

@ -110,7 +110,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -131,7 +131,7 @@ public class CreateOpenCitationsASTest {
 			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
 			.map(aa -> ((Relation) aa.getPayload()));

-		assertEquals(44, tmp.count());
+		assertEquals(46, tmp.count());

 		// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));

@ -142,7 +142,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -175,7 +175,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -215,7 +215,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -240,8 +240,8 @@ public class CreateOpenCitationsASTest {
 			assertEquals("citation", r.getSubRelType());
 			assertEquals("resultResult", r.getRelType());
 		});
-		assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
-		assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
+		assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
+		assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());

 	}

@ -250,7 +250,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
@ -295,7 +295,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
 			.getPath();

 		CreateActionSetSparkJob
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java
@ -0,0 +1,138 @@
+
+package eu.dnetlib.dhp.actionmanager.opencitations;
+
+import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+
+public class ReadCOCITest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(ReadCOCITest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(ReadCOCITest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(ReadCOCITest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(ReadCOCITest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testReadCOCI() throws Exception {
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
+			.getPath();
+
+		LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
+
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
+
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
+
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
+
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
+
+		ReadCOCI
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-workingPath",
+					workingDir.toString() + "/COCI",
+					"-outputPath",
+					workingDir.toString() + "/COCI_json/",
+					"-inputFile", "input1;input2;input3;input4;input5"
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<COCI> tmp = sc
+			.textFile(workingDir.toString() + "/COCI_json/*/")
+			.map(item -> OBJECT_MAPPER.readValue(item, COCI.class));
+
+		Assertions.assertEquals(24, tmp.count());
+
+		Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count());
+
+		Assertions.assertEquals(8, tmp.filter(c -> c.getCiting().indexOf(".refs") > -1).count());
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java
@ -1,7 +1,10 @@

 package eu.dnetlib.dhp.actionmanager.ror;

+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 import java.io.FileInputStream;
+import java.util.List;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@ -13,9 +16,12 @@ import org.junit.jupiter.api.Test;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Relation;

-@Disabled
 class GenerateRorActionSetJobTest {

 	private static final ObjectMapper mapper = new ObjectMapper();
@ -30,21 +36,40 @@ class GenerateRorActionSetJobTest {
 	void testConvertRorOrg() throws Exception {
 		final RorOrganization r = mapper
 			.readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class);
-		final Organization org = GenerateRorActionSetJob.convertRorOrg(r);
+		final List<AtomicAction<? extends Oaf>> aas = GenerateRorActionSetJob.convertRorOrg(r);
+
+		Assertions.assertEquals(3, aas.size());
+		assertEquals(Organization.class, aas.get(0).getClazz());
+		assertEquals(Relation.class, aas.get(1).getClazz());
+		assertEquals(Relation.class, aas.get(2).getClazz());
+
+		final Organization o = (Organization) aas.get(0).getPayload();
+		final Relation r1 = (Relation) aas.get(1).getPayload();
+		final Relation r2 = (Relation) aas.get(2).getPayload();
+
+		assertEquals(o.getId(), r1.getSource());
+		assertEquals(r1.getSource(), r2.getTarget());
+		assertEquals(r2.getSource(), r1.getTarget());
+		assertEquals(ModelConstants.IS_PARENT_OF, r1.getRelClass());
+		assertEquals(ModelConstants.IS_CHILD_OF, r2.getRelClass());
+
+		System.out.println(mapper.writeValueAsString(o));
+		System.out.println(mapper.writeValueAsString(r1));
+		System.out.println(mapper.writeValueAsString(r2));

-		final String s = mapper.writeValueAsString(org);
-		Assertions.assertTrue(StringUtils.isNotBlank(s));
-		System.out.println(s);
 	}

 	@Test
+	@Disabled
 	void testConvertAllRorOrg() throws Exception {
 		final RorOrganization[] arr = mapper
 			.readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class);

 		for (final RorOrganization r : arr) {
-			Organization o = GenerateRorActionSetJob.convertRorOrg(r);
-			Assertions.assertNotNull(o);
+			final List<AtomicAction<? extends Oaf>> aas = GenerateRorActionSetJob.convertRorOrg(r);
+			Assertions.assertFalse(aas.isEmpty());
+			Assertions.assertNotNull(aas.get(0));
+			final Organization o = (Organization) aas.get(0).getPayload();
 			Assertions.assertTrue(StringUtils.isNotBlank(o.getId()));
 		}
 	}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
@ -0,0 +1,259 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class SparkAtomicActionCountJobTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(SparkAtomicActionCountJobTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(SparkAtomicActionCountJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(SparkAtomicActionCountJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SparkAtomicActionCountJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testMatch() {
+		String usageScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb")
+			.getPath();
+
+		SparkAtomicActionUsageJob.prepareActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Result> tmp = sc
+			.textFile(workingDir.toString() + "/actionSet")
+			.map(usm -> OBJECT_MAPPER.readValue(usm, Result.class));
+
+		Assertions.assertEquals(9, tmp.count());
+
+		tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size()));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible()))));
+
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"measure:usage_counts",
+										u.getDataInfo().getProvenanceaction().getClassid()))));
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"Inferred by OpenAIRE",
+										u.getDataInfo().getProvenanceaction().getClassname()))));
+
+		tmp
+			.foreach(
+				r -> r
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"count",
+										u.getKey()))));
+
+		Assertions
+			.assertEquals(
+				1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"5",
+				tmp
+					.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"1",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"2",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"6",
+				tmp
+					.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1
@ -1,8 +0,0 @@
-oci,citing,cited,creation,timespan,journal_sc,author_sc
-02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no
-02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no
-02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no
-02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no
-02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no
-02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no
-02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2
@ -1,8 +0,0 @@
-oci,citing,cited,creation,timespan,journal_sc,author_sc
-02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no
-02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no
-02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no
-02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no
-02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no
-02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no
-02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3
@ -1,9 +0,0 @@
-oci,citing,cited,creation,timespan,journal_sc,author_sc
-0200100000236090708010101090307000202023727141528-020050302063600040000010307,10.1002/9781119370222.refs,10.5326/0400137,2020-06-22,P16Y3M,no,no
-0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020000073700000301093733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2007.00319.x,2020-06-22,P12Y8M,no,no
-0200100000236090708010101090307000202023727141528-0200101010136312830370102030509,10.1002/9781119370222.refs,10.1111/vsu.12359,2020-06-22,P4Y10M29D,no,no
-0200100000236090708010101090307000202023727141528-020050302063600030900020904,10.1002/9781119370222.refs,10.5326/0390294,2020-06-22,P17Y1M,no,no
-0200100000236090708010101090307000202023727141528-020050302063600040200030701,10.1002/9781119370222.refs,10.5326/0420371,2020-06-22,P13Y9M,no,no
-0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020001033701020000003733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2013.12000.x,2020-06-22,P7Y2M,no,no
-0200100000236090708010101090307000202023727141528-020010008003600000408000106093702000006370306070200,10.1002/9781119370222.refs,10.1080/00480169.2006.36720,2020-06-22,P13Y6M,no,no
-0200100000236090708010101090307000202023727141528-0200101010136193701070501630008010337020000063700000003033733,10.1002/9781119370222.refs,10.1111/j.1751-0813.2006.00033.x,2020-06-22,P13Y8M,no,no
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json
@ -1,123 +1,94 @@
-{
-  "ip_addresses": [],
-  "aliases": [],
-  "acronyms": [
-    "ANU"
-  ],
-  "links": [
-    "http://www.anu.edu.au/"
-  ],
-  "country": {
-    "country_code": "AU",
-    "country_name": "Australia"
-  },
-  "name": "Australian National University",
-  "wikipedia_url": "http://en.wikipedia.org/wiki/Australian_National_University",
-  "addresses": [
-    {
-      "lat": -35.2778,
-      "state_code": "AU-ACT",
-      "country_geonames_id": 2077456,
-      "lng": 149.1205,
-      "state": "Australian Capital Territory",
-      "city": "Canberra",
-      "geonames_city": {
-        "nuts_level2": {
-          "name": null,
-          "code": null
-        },
-        "geonames_admin2": {
-          "ascii_name": null,
-          "id": null,
-          "name": null,
-          "code": null
-        },
-        "geonames_admin1": {
-          "ascii_name": "ACT",
-          "id": 2177478,
-          "name": "ACT",
-          "code": "AU.01"
-        },
-        "city": "Canberra",
-        "id": 2172517,
-        "nuts_level1": {
-          "name": null,
-          "code": null
-        },
-        "nuts_level3": {
-          "name": null,
-          "code": null
-        },
-        "license": {
-          "attribution": "Data from geonames.org under a CC-BY 3.0 license",
-          "license": "http://creativecommons.org/licenses/by/3.0/"
-        }
-      },
-      "postcode": null,
-      "primary": false,
-      "line": null
-    }
-  ],
-  "types": [
-    "Education"
-  ],
-  "established": 1946,
-  "relationships": [
-    {
-      "type": "Related",
-      "id": "https://ror.org/041c7s516",
-      "label": "Calvary Hospital"
-    },
-    {
-      "type": "Related",
-      "id": "https://ror.org/04h7nbn38",
-      "label": "Canberra Hospital"
-    },
-    {
-      "type": "Related",
-      "id": "https://ror.org/030jpqj15",
-      "label": "Goulburn Base Hospital"
-    },
-    {
-      "type": "Child",
-      "id": "https://ror.org/006a4jj40",
-      "label": "Mount Stromlo Observatory"
-    }
-  ],
-  "email_address": null,
-  "external_ids": {
-    "Wikidata": {
-      "all": [
-        "Q127990"
-      ],
-      "preferred": null
-    },
-    "OrgRef": {
-      "all": [
-        "285106"
-      ],
-      "preferred": null
-    },
-    "ISNI": {
-      "all": [
-        "0000 0001 2180 7477"
-      ],
-      "preferred": null
-    },
-    "FundRef": {
-      "all": [
-        "501100000995",
-        "501100001151",
-        "100009020"
-      ],
-      "preferred": "501100000995"
-    },
-    "GRID": {
-      "all": "grid.1001.0",
-      "preferred": "grid.1001.0"
-    }
-  },
-  "id": "https://ror.org/019wvm592",
-  "labels": [],
-  "status": "active"
+ {
+	"ip_addresses": [],
+	"aliases": [],
+	"acronyms": [
+		"MSO"
+	],
+	"links": [
+		"https://rsaa.anu.edu.au/observatories/mount-stromlo-observatory"
+	],
+	"country": {
+		"country_code": "AU",
+		"country_name": "Australia"
+	},
+	"name": "Mount Stromlo Observatory",
+	"wikipedia_url": "https://en.wikipedia.org/wiki/Mount_Stromlo_Observatory",
+	"addresses": [
+		{
+			"lat": -35.320278,
+			"state_code": "AU-ACT",
+			"country_geonames_id": 2077456,
+			"lng": 149.006944,
+			"state": "Australian Capital Territory",
+			"city": "Canberra",
+			"geonames_city": {
+				"nuts_level2": {
+					"name": null,
+					"code": null
+				},
+				"geonames_admin2": {
+					"ascii_name": null,
+					"id": null,
+					"name": null,
+					"code": null
+				},
+				"geonames_admin1": {
+					"ascii_name": "ACT",
+					"id": 2177478,
+					"name": "ACT",
+					"code": "AU.01"
+				},
+				"city": "Canberra",
+				"id": 2172517,
+				"nuts_level1": {
+					"name": null,
+					"code": null
+				},
+				"nuts_level3": {
+					"name": null,
+					"code": null
+				},
+				"license": {
+					"attribution": "Data from geonames.org under a CC-BY 3.0 license",
+					"license": "http://creativecommons.org/licenses/by/3.0/"
+				}
+			},
+			"postcode": null,
+			"primary": false,
+			"line": null
+		}
+	],
+	"types": [
+		"Education"
+	],
+	"established": 1924,
+	"relationships": [
+		{
+			"type": "Parent",
+			"id": "https://ror.org/019wvm592",
+			"label": "Australian National University"
+		}
+	],
+	"email_address": null,
+	"external_ids": {
+		"ISNI": {
+			"all": [
+				"0000 0004 0459 2816"
+			],
+			"preferred": null
+		},
+		"Wikidata": {
+			"all": [
+				"Q1310548"
+			],
+			"preferred": null
+		},
+		"GRID": {
+			"all": "grid.440325.4",
+			"preferred": "grid.440325.4"
+		}
+	},
+	"id": "https://ror.org/006a4jj40",
+	"labels": [],
+	"status": "active"
 }
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb
@ -0,0 +1,12 @@
+{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4}
+{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1}
+{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1}
+{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3}
+{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1}
+{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3}
+{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3}
+{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1}
+{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3}
+{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8}
+{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2}
+{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3}
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
@ -70,6 +70,8 @@ class DataciteToOAFTest extends AbstractVocabularyTest {

    assertEquals(100, nativeSize)

+    spark.read.load(targetPath).printSchema();
+
    val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]

    result
--- a/dhp-workflows/dhp-blacklist/pom.xml
+++ b/dhp-workflows/dhp-blacklist/pom.xml
@ -3,7 +3,7 @@
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

--- a/dhp-workflows/dhp-broker-events/pom.xml
+++ b/dhp-workflows/dhp-broker-events/pom.xml
@ -1,11 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<parent>
 		<artifactId>dhp-workflows</artifactId>
 		<groupId>eu.dnetlib.dhp</groupId>
-		<version>1.2.4-SNAPSHOT</version>
+		<version>1.2.5-SNAPSHOT</version>
 	</parent>
 	<modelVersion>4.0.0</modelVersion>

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java
@ -0,0 +1,192 @@
+
+package eu.dnetlib.dhp.broker.oa;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.util.LongAccumulator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.broker.model.ConditionParams;
+import eu.dnetlib.dhp.broker.model.Event;
+import eu.dnetlib.dhp.broker.model.MappedFields;
+import eu.dnetlib.dhp.broker.model.Notification;
+import eu.dnetlib.dhp.broker.model.Subscription;
+import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
+import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
+import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;
+
+public class GenerateNotificationsJob {
+
+	private static final Logger log = LoggerFactory.getLogger(GenerateNotificationsJob.class);
+
+	public static void main(final String[] args) throws Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GenerateNotificationsJob.class
+						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_notifications.json")));
+		parser.parseArgument(args);
+
+		final SparkConf conf = new SparkConf();
+
+		final String eventsPath = parser.get("outputDir") + "/events";
+		log.info("eventsPath: {}", eventsPath);
+
+		final String notificationsPath = parser.get("outputDir") + "/notifications";
+		log.info("notificationsPath: {}", notificationsPath);
+
+		final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
+		log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
+
+		final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
+
+		final LongAccumulator total = spark.sparkContext().longAccumulator("total_notifications");
+
+		final long startTime = new Date().getTime();
+
+		final List<Subscription> subscriptions = listSubscriptions(brokerApiBaseUrl);
+
+		log.info("Number of subscriptions: " + subscriptions.size());
+
+		if (subscriptions.size() > 0) {
+			final Map<String, Map<String, List<ConditionParams>>> conditionsMap = prepareConditionsMap(subscriptions);
+
+			log.info("ConditionsMap: " + new ObjectMapper().writeValueAsString(conditionsMap));
+
+			final Encoder<NotificationGroup> ngEncoder = Encoders.bean(NotificationGroup.class);
+			final Encoder<Notification> nEncoder = Encoders.bean(Notification.class);
+			final Dataset<Notification> notifications = ClusterUtils
+				.readPath(spark, eventsPath, Event.class)
+				.map(
+					(MapFunction<Event, NotificationGroup>) e -> generateNotifications(
+						e, subscriptions, conditionsMap, startTime),
+					ngEncoder)
+				.flatMap((FlatMapFunction<NotificationGroup, Notification>) g -> g.getData().iterator(), nEncoder);
+
+			ClusterUtils.save(notifications, notificationsPath, Notification.class, total);
+		}
+	}
+
+	protected static Map<String, Map<String, List<ConditionParams>>> prepareConditionsMap(
+		final List<Subscription> subscriptions) {
+		final Map<String, Map<String, List<ConditionParams>>> map = new HashMap<>();
+		subscriptions.forEach(s -> map.put(s.getSubscriptionId(), s.conditionsAsMap()));
+		return map;
+	}
+
+	protected static NotificationGroup generateNotifications(final Event e,
+		final List<Subscription> subscriptions,
+		final Map<String, Map<String, List<ConditionParams>>> conditionsMap,
+		final long date) {
+		final List<Notification> list = subscriptions
+			.stream()
+			.filter(
+				s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
+			.filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId())))
+			.map(s -> generateNotification(s, e, date))
+			.collect(Collectors.toList());
+
+		return new NotificationGroup(list);
+	}
+
+	private static Notification generateNotification(final Subscription s, final Event e, final long date) {
+		final Notification n = new Notification();
+		n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId()));
+		n.setSubscriptionId(s.getSubscriptionId());
+		n.setEventId(e.getEventId());
+		n.setProducerId(e.getProducerId());
+		n.setTopic(e.getTopic());
+		n.setPayload(e.getPayload());
+		n.setMap(e.getMap());
+		n.setDate(date);
+		return n;
+	}
+
+	private static boolean verifyConditions(final MappedFields map,
+		final Map<String, List<ConditionParams>> conditions) {
+		if (conditions.containsKey("targetDatasourceName")
+			&& !SubscriptionUtils
+				.verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) {
+			return false;
+		}
+
+		if (conditions.containsKey("trust")
+			&& !SubscriptionUtils
+				.verifyFloatRange(
+					map.getTrust(), conditions.get("trust").get(0).getValue(),
+					conditions.get("trust").get(0).getOtherValue())) {
+			return false;
+		}
+
+		if (conditions.containsKey("targetDateofacceptance") && !conditions
+			.get("targetDateofacceptance")
+			.stream()
+			.anyMatch(
+				c -> SubscriptionUtils
+					.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
+			return false;
+		}
+
+		if (conditions.containsKey("targetResultTitle")
+			&& !conditions
+				.get("targetResultTitle")
+				.stream()
+				.anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) {
+			return false;
+		}
+
+		if (conditions.containsKey("targetAuthors")
+			&& !conditions
+				.get("targetAuthors")
+				.stream()
+				.allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) {
+			return false;
+		}
+
+		return !conditions.containsKey("targetSubjects")
+			|| conditions
+				.get("targetSubjects")
+				.stream()
+				.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()));
+
+	}
+
+	private static List<Subscription> listSubscriptions(final String brokerApiBaseUrl) throws Exception {
+		final String url = brokerApiBaseUrl + "/api/subscriptions";
+		final HttpGet req = new HttpGet(url);
+
+		final ObjectMapper mapper = new ObjectMapper();
+
+		try (final CloseableHttpClient client = HttpClients.createDefault()) {
+			try (final CloseableHttpResponse response = client.execute(req)) {
+				final String s = IOUtils.toString(response.getEntity().getContent());
+				return mapper
+					.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class));
+			}
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
@ -2,15 +2,10 @@
 package eu.dnetlib.dhp.broker.oa;

 import java.io.IOException;
-import java.util.Date;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;

-import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpDelete;
 import org.apache.http.client.methods.HttpGet;
@ -18,10 +13,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoder;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.util.LongAccumulator;
@ -33,10 +25,8 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.broker.model.*;
+import eu.dnetlib.dhp.broker.model.Notification;
 import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
-import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
-import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;

 public class IndexNotificationsJob {

@ -53,8 +43,8 @@ public class IndexNotificationsJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("outputDir") + "/events";
-		log.info("eventsPath: {}", eventsPath);
+		final String notificationsPath = parser.get("outputDir") + "/notifications";
+		log.info("notificationsPath: {}", notificationsPath);

 		final String index = parser.get("index");
 		log.info("index: {}", index);
@ -81,143 +71,41 @@ public class IndexNotificationsJob {

 		final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");

-		final long startTime = new Date().getTime();
+		final Long date = ClusterUtils
+			.readPath(spark, notificationsPath, Notification.class)
+			.first()
+			.getDate();

-		final List<Subscription> subscriptions = listSubscriptions(brokerApiBaseUrl);
+		final JavaRDD<String> toIndexRdd = ClusterUtils
+			.readPath(spark, notificationsPath, Notification.class)
+			.map((MapFunction<Notification, String>) n -> prepareForIndexing(n, total), Encoders.STRING())
+			.javaRDD();

-		log.info("Number of subscriptions: {}", subscriptions.size());
+		final Map<String, String> esCfg = new HashMap<>();

-		if (!subscriptions.isEmpty()) {
-			final Encoder<NotificationGroup> ngEncoder = Encoders.bean(NotificationGroup.class);
-			final Encoder<Notification> nEncoder = Encoders.bean(Notification.class);
-			final Dataset<Notification> notifications = ClusterUtils
-				.readPath(spark, eventsPath, Event.class)
-				.map(
-					(MapFunction<Event, NotificationGroup>) e -> generateNotifications(e, subscriptions, startTime),
-					ngEncoder)
-				.flatMap((FlatMapFunction<NotificationGroup, Notification>) g -> g.getData().iterator(), nEncoder);
+		esCfg.put("es.index.auto.create", "false");
+		esCfg.put("es.nodes", indexHost);
+		esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
+		esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+		esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+		esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+		esCfg.put("es.nodes.wan.only", esNodesWanOnly);

-			final JavaRDD<String> inputRdd = notifications
-				.map((MapFunction<Notification, String>) n -> prepareForIndexing(n, total), Encoders.STRING())
-				.javaRDD();
+		log.info("*** Start indexing");
+		JavaEsSpark.saveJsonToEs(toIndexRdd, index, esCfg);
+		log.info("*** End indexing");

-			final Map<String, String> esCfg = new HashMap<>();
+		log.info("*** Deleting old notifications");
+		final String message = deleteOldNotifications(brokerApiBaseUrl, date - 1000);
+		log.info("*** Deleted notifications: {}", message);

-			esCfg.put("es.index.auto.create", "false");
-			esCfg.put("es.nodes", indexHost);
-			esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
-			esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
-			esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
-			esCfg.put("es.batch.size.entries", esBatchSizeEntries);
-			esCfg.put("es.nodes.wan.only", esNodesWanOnly);
-
-			log.info("*** Start indexing");
-			JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
-			log.info("*** End indexing");
-
-			log.info("*** Deleting old notifications");
-			final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000);
-			log.info("*** Deleted notifications: {}", message);
-
-			log.info("*** sendNotifications (emails, ...)");
-			sendNotifications(brokerApiBaseUrl, startTime - 1000);
-			log.info("*** ALL done.");
-
-		}
-	}
-
-	private static NotificationGroup generateNotifications(final Event e,
-		final List<Subscription> subscriptions,
-		final long date) {
-		final List<Notification> list = subscriptions
-			.stream()
-			.filter(
-				s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
-			.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
-			.map(s -> generateNotification(s, e, date))
-			.collect(Collectors.toList());
-
-		return new NotificationGroup(list);
-	}
-
-	private static Notification generateNotification(final Subscription s, final Event e, final long date) {
-		final Notification n = new Notification();
-		n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId()));
-		n.setSubscriptionId(s.getSubscriptionId());
-		n.setEventId(e.getEventId());
-		n.setProducerId(e.getProducerId());
-		n.setTopic(e.getTopic());
-		n.setPayload(e.getPayload());
-		n.setMap(e.getMap());
-		n.setDate(date);
-		return n;
-	}
-
-	private static boolean verifyConditions(final MappedFields map,
-		final Map<String, List<ConditionParams>> conditions) {
-		if (conditions.containsKey("targetDatasourceName")
-			&& !SubscriptionUtils
-				.verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) {
-			return false;
-		}
-
-		if (conditions.containsKey("trust")
-			&& !SubscriptionUtils
-				.verifyFloatRange(
-					map.getTrust(), conditions.get("trust").get(0).getValue(),
-					conditions.get("trust").get(0).getOtherValue())) {
-			return false;
-		}
-
-		if (conditions.containsKey("targetDateofacceptance") && conditions
-			.get("targetDateofacceptance")
-			.stream()
-			.noneMatch(
-				c -> SubscriptionUtils
-					.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
-			return false;
-		}
-
-		if (conditions.containsKey("targetResultTitle")
-			&& conditions
-				.get("targetResultTitle")
-				.stream()
-				.noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) {
-			return false;
-		}
-
-		if (conditions.containsKey("targetAuthors")
-			&& conditions
-				.get("targetAuthors")
-				.stream()
-				.noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) {
-			return false;
-		}
-
-		return !conditions.containsKey("targetSubjects")
-			|| conditions
-				.get("targetSubjects")
-				.stream()
-				.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()));
+		log.info("*** sendNotifications (emails, ...)");
+		sendNotifications(brokerApiBaseUrl, date - 1000);
+		log.info("*** ALL done.");

 	}

-	private static List<Subscription> listSubscriptions(final String brokerApiBaseUrl) throws IOException {
-		final String url = brokerApiBaseUrl + "/api/subscriptions";
-		final HttpGet req = new HttpGet(url);
-
-		final ObjectMapper mapper = new ObjectMapper();
-
-		try (final CloseableHttpClient client = HttpClients.createDefault()) {
-			try (final CloseableHttpResponse response = client.execute(req)) {
-				final String s = IOUtils.toString(response.getEntity().getContent());
-				return mapper
-					.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class));
-			}
-		}
-	}
-
-	private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException {
+	private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception {
 		final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l;
 		final HttpDelete req = new HttpDelete(url);

--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
@ -115,6 +115,11 @@
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
+        <property>
+        	<name>sparkMaxExecutorsForIndexing</name>
+        	<value>8</value>
+        	<description>Max number of workers for ElasticSearch indexing</description>
+        </property>
    </parameters>

    <global>
@ -498,7 +503,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.dynamicAllocation.maxExecutors="8" 
+                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -542,6 +547,30 @@
            <arg>--dbPassword</arg><arg>${brokerDbPassword}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
+        <ok to="generate_notifications"/>
+        <error to="Kill"/>
+    </action>
+    
+    <action name="generate_notifications">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>GenerateNotificationsJob</name>
+            <class>eu.dnetlib.dhp.broker.oa.GenerateNotificationsJob</class>
+            <jar>dhp-broker-events-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
+            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
+        </spark>
        <ok to="index_notifications"/>
        <error to="Kill"/>
    </action>
@ -556,7 +585,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.dynamicAllocation.maxExecutors="8" 
+                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json
@ -0,0 +1,14 @@
+[
+	{
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the dir that contains the events folder",
+		"paramRequired": true
+	},
+	{
+		"paramName": "broker",
+		"paramLongName": "brokerApiBaseUrl",
+		"paramDescription": "the url of the broker service api",
+		"paramRequired": true
+	}
+]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
@ -98,6 +98,11 @@
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
+        <property>
+        	<name>sparkMaxExecutorsForIndexing</name>
+        	<value>8</value>
+        	<description>Max number of workers for ElasticSearch indexing</description>
+        </property>
    </parameters>

    <global>
@ -119,12 +124,36 @@
        </configuration>
    </global>

-    <start to="index_notifications"/>
+    <start to="generate_notifications"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    
+     <action name="generate_notifications">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>GenerateNotificationsJob</name>
+            <class>eu.dnetlib.dhp.broker.oa.GenerateNotificationsJob</class>
+            <jar>dhp-broker-events-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
+            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
+        </spark>
+        <ok to="index_notifications"/>
+        <error to="Kill"/>
+    </action>
+    
    <action name="index_notifications">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -135,7 +164,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.dynamicAllocation.maxExecutors="8" 
+                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
@ -75,6 +75,11 @@
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
+        <property>
+        	<name>sparkMaxExecutorsForIndexing</name>
+        	<value>8</value>
+        	<description>Max number of workers for ElasticSearch indexing</description>
+        </property>
    </parameters>

    <global>
@ -112,7 +117,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.dynamicAllocation.maxExecutors="8" 
+                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJobTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJobTest.java
@ -0,0 +1,133 @@
+
+package eu.dnetlib.dhp.broker.oa;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import eu.dnetlib.dhp.broker.model.ConditionParams;
+import eu.dnetlib.dhp.broker.model.Event;
+import eu.dnetlib.dhp.broker.model.MappedFields;
+import eu.dnetlib.dhp.broker.model.Subscription;
+import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
+
+class GenerateNotificationsJobTest {
+
+	private List<Subscription> subscriptions;
+
+	private Map<String, Map<String, List<ConditionParams>>> conditionsMap;
+
+	private static final int N_TIMES = 1_000_000;
+
+	@BeforeEach
+	void setUp() throws Exception {
+		final Subscription s = new Subscription();
+		s.setTopic("ENRICH/MISSING/PID");
+		s
+			.setConditions(
+				"[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]");
+		subscriptions = Arrays.asList(s);
+		conditionsMap = GenerateNotificationsJob.prepareConditionsMap(subscriptions);
+	}
+
+	@Test
+	void testGenerateNotifications_invalid_topic() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PROJECT");
+
+		final NotificationGroup res = GenerateNotificationsJob
+			.generateNotifications(event, subscriptions, conditionsMap, 0);
+		assertEquals(0, res.getData().size());
+	}
+
+	@Test
+	void testGenerateNotifications_topic_match() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PID");
+		event.setMap(new MappedFields());
+		event.getMap().setTargetDatasourceName("reposiTUm");
+		event.getMap().setTrust(0.8f);
+
+		final NotificationGroup res = GenerateNotificationsJob
+			.generateNotifications(event, subscriptions, conditionsMap, 0);
+		assertEquals(1, res.getData().size());
+	}
+
+	@Test
+	void testGenerateNotifications_topic_no_match() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PID");
+		event.setMap(new MappedFields());
+		event.getMap().setTargetDatasourceName("Puma");
+		event.getMap().setTrust(0.8f);
+
+		final NotificationGroup res = GenerateNotificationsJob
+			.generateNotifications(event, subscriptions, conditionsMap, 0);
+		assertEquals(0, res.getData().size());
+	}
+
+	@Test
+	void testGenerateNotifications_invalid_topic_repeated() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PROJECT");
+
+		// warm up
+		GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+
+		final long start = System.currentTimeMillis();
+		for (int i = 0; i < N_TIMES; i++) {
+			GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+		}
+		final long end = System.currentTimeMillis();
+		System.out
+			.println(String.format("no topic - repeated %s times - execution time: %s ms ", N_TIMES, end - start));
+
+	}
+
+	@Test
+	void testGenerateNotifications_topic_match_repeated() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PID");
+		event.setMap(new MappedFields());
+		event.getMap().setTargetDatasourceName("reposiTUm");
+		event.getMap().setTrust(0.8f);
+
+		// warm up
+		GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+
+		final long start = System.currentTimeMillis();
+		for (int i = 0; i < N_TIMES; i++) {
+			GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+		}
+		final long end = System.currentTimeMillis();
+		System.out
+			.println(String.format("topic match - repeated %s times - execution time: %s ms ", N_TIMES, end - start));
+	}
+
+	@Test
+	void testGenerateNotifications_topic_no_match_repeated() {
+		final Event event = new Event();
+		event.setTopic("ENRICH/MISSING/PID");
+		event.setMap(new MappedFields());
+		event.getMap().setTargetDatasourceName("Puma");
+		event.getMap().setTrust(0.8f);
+
+		// warm up
+		GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+
+		final long start = System.currentTimeMillis();
+		for (int i = 0; i < N_TIMES; i++) {
+			GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0);
+		}
+		final long end = System.currentTimeMillis();
+		System.out
+			.println(
+				String.format("topic no match - repeated %s times - execution time: %s ms ", N_TIMES, end - start));
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java
@ -0,0 +1,132 @@
+
+package eu.dnetlib.dhp.broker.oa.samples;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.broker.model.ConditionParams;
+import eu.dnetlib.dhp.broker.model.MapCondition;
+import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;
+
+@Disabled
+public class SimpleVariableJobTest {
+
+	private static final Logger log = LoggerFactory.getLogger(SimpleVariableJobTest.class);
+
+	private static Path workingDir;
+
+	private static SparkSession spark;
+
+	private final static List<String> inputList = new ArrayList<>();
+
+	private static final Map<String, Map<String, List<ConditionParams>>> staticMap = new HashMap<>();
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+
+		workingDir = Files.createTempDirectory(SimpleVariableJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		final SparkConf conf = new SparkConf();
+		conf.setAppName(SimpleVariableJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		// conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		// conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SimpleVariableJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+
+		for (int i = 0; i < 1_000_000; i++) {
+			inputList.add("record " + i);
+		}
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	public void testSimpleVariableJob() throws Exception {
+		final Map<String, Map<String, List<ConditionParams>>> map = fillMap();
+
+		final long n = spark
+			.createDataset(inputList, Encoders.STRING())
+			.filter(s -> filter(map.get(s)))
+			.map((MapFunction<String, String>) s -> s.toLowerCase(), Encoders.STRING())
+			.count();
+
+		System.out.println(n);
+	}
+
+	@Test
+	public void testSimpleVariableJob_static() throws Exception {
+
+		staticMap.putAll(fillMap());
+
+		final long n = spark
+			.createDataset(inputList, Encoders.STRING())
+			.filter(s -> filter(staticMap.get(s)))
+			.map((MapFunction<String, String>) s -> s.toLowerCase(), Encoders.STRING())
+			.count();
+
+		System.out.println(n);
+	}
+
+	private static Map<String, Map<String, List<ConditionParams>>> fillMap()
+		throws JsonParseException, JsonMappingException, IOException {
+		final String s = "[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]";
+
+		final ObjectMapper mapper = new ObjectMapper();
+		final List<MapCondition> list = mapper
+			.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, MapCondition.class));
+		final Map<String, List<ConditionParams>> conditions = list
+			.stream()
+			.filter(mc -> !mc.getListParams().isEmpty())
+			.collect(Collectors.toMap(MapCondition::getField, MapCondition::getListParams));
+
+		final Map<String, Map<String, List<ConditionParams>>> map = new HashMap<>();
+		inputList.forEach(i -> map.put(i, conditions));
+		return map;
+	}
+
+	private static boolean filter(final Map<String, List<ConditionParams>> conditions) {
+		if (conditions.containsKey("targetDatasourceName")
+			&& !SubscriptionUtils
+				.verifyExact("reposiTUm", conditions.get("targetDatasourceName").get(0).getValue())) {
+			return false;
+		}
+		return true;
+	}
+
+}
--- a/dhp-workflows/dhp-dedup-openaire/pom.xml
+++ b/dhp-workflows/dhp-dedup-openaire/pom.xml
@ -3,7 +3,7 @@
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
    <artifactId>dhp-dedup-openaire</artifactId>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
@ -77,6 +77,7 @@ public class DedupRecordFactory {
 		throws IllegalAccessException, InstantiationException {

 		T entity = clazz.newInstance();
+		entity.setDataInfo(dataInfo);

 		final Collection<String> dates = Lists.newArrayList();
 		final List<List<Author>> authors = Lists.newArrayList();
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
@ -104,7 +104,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>group graph entities</name>
-            <class>eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -138,7 +138,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch publications</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -163,7 +163,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch project</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -188,7 +188,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch organization</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -213,7 +213,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch publication</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -238,7 +238,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch dataset</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -263,7 +263,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch software</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
@ -288,7 +288,7 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dispatch otherresearchproduct</name>
-            <class>eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob</class>
+            <class>eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
@ -77,7 +77,16 @@ class EntityMergerTest implements Serializable {
 		// verify id
 		assertEquals(dedupId, pub_merged.getId());

-		assertEquals(pub_top.getJournal(), pub_merged.getJournal());
+		assertEquals(pub_top.getJournal().getName(), pub_merged.getJournal().getName());
+		assertEquals(pub_top.getJournal().getIssnOnline(), pub_merged.getJournal().getIssnOnline());
+		assertEquals(pub_top.getJournal().getIssnLinking(), pub_merged.getJournal().getIssnLinking());
+		assertEquals(pub_top.getJournal().getIssnPrinted(), pub_merged.getJournal().getIssnPrinted());
+		assertEquals(pub_top.getJournal().getIss(), pub_merged.getJournal().getIss());
+		assertEquals(pub_top.getJournal().getEp(), pub_merged.getJournal().getEp());
+		assertEquals(pub_top.getJournal().getSp(), pub_merged.getJournal().getSp());
+		assertEquals(pub_top.getJournal().getVol(), pub_merged.getJournal().getVol());
+		assertEquals(pub_top.getJournal().getConferencedate(), pub_merged.getJournal().getConferencedate());
+		assertEquals(pub_top.getJournal().getConferenceplace(), pub_merged.getJournal().getConferenceplace());
 		assertEquals("OPEN", pub_merged.getBestaccessright().getClassid());
 		assertEquals(pub_top.getResulttype(), pub_merged.getResulttype());
 		assertEquals(pub_top.getLanguage(), pub_merged.getLanguage());
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@ -206,11 +206,16 @@ public class SparkDedupTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct"))
 			.count();

-		assertEquals(3082, orgs_simrel);
-		assertEquals(7036, pubs_simrel);
+		assertEquals(3076, orgs_simrel);
+		assertEquals(7040, pubs_simrel);
 		assertEquals(336, sw_simrel);
 		assertEquals(442, ds_simrel);
-		assertEquals(6750, orp_simrel);
+		assertEquals(6784, orp_simrel);
+//		System.out.println("orgs_simrel = " + orgs_simrel);
+//		System.out.println("pubs_simrel = " + pubs_simrel);
+//		System.out.println("sw_simrel = " + sw_simrel);
+//		System.out.println("ds_simrel = " + ds_simrel);
+//		System.out.println("orp_simrel = " + orp_simrel);
 	}

 	@Test
@ -258,10 +263,14 @@ public class SparkDedupTest implements Serializable {
 			.count();

 		// entities simrels supposed to be equal to the number of previous step (no rels in whitelist)
-		assertEquals(3082, orgs_simrel);
-		assertEquals(7036, pubs_simrel);
+		assertEquals(3076, orgs_simrel);
+		assertEquals(7040, pubs_simrel);
 		assertEquals(442, ds_simrel);
-		assertEquals(6750, orp_simrel);
+		assertEquals(6784, orp_simrel);
+//		System.out.println("orgs_simrel = " + orgs_simrel);
+//		System.out.println("pubs_simrel = " + pubs_simrel);
+//		System.out.println("ds_simrel = " + ds_simrel);
+//		System.out.println("orp_simrel = " + orp_simrel);

 		// entities simrels to be different from the number of previous step (new simrels in the whitelist)
 		Dataset<Row> sw_simrel = spark
@ -288,6 +297,7 @@ public class SparkDedupTest implements Serializable {
 				.count() > 0);

 		assertEquals(338, sw_simrel.count());
+//		System.out.println("sw_simrel = " + sw_simrel.count());

 	}

@ -435,11 +445,16 @@ public class SparkDedupTest implements Serializable {
 			.load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")
 			.count();

-		assertEquals(1272, orgs_mergerel);
-		assertEquals(1438, pubs_mergerel);
+		assertEquals(1268, orgs_mergerel);
+		assertEquals(1444, pubs_mergerel);
 		assertEquals(286, sw_mergerel);
 		assertEquals(472, ds_mergerel);
-		assertEquals(718, orp_mergerel);
+		assertEquals(738, orp_mergerel);
+//		System.out.println("orgs_mergerel = " + orgs_mergerel);
+//		System.out.println("pubs_mergerel = " + pubs_mergerel);
+//		System.out.println("sw_mergerel = " + sw_mergerel);
+//		System.out.println("ds_mergerel = " + ds_mergerel);
+//		System.out.println("orp_mergerel = " + orp_mergerel);

 	}

@ -483,11 +498,17 @@ public class SparkDedupTest implements Serializable {
 				testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord")
 			.count();

-		assertEquals(85, orgs_deduprecord);
-		assertEquals(65, pubs_deduprecord);
+		assertEquals(86, orgs_deduprecord);
+		assertEquals(67, pubs_deduprecord);
 		assertEquals(49, sw_deduprecord);
 		assertEquals(97, ds_deduprecord);
-		assertEquals(89, orp_deduprecord);
+		assertEquals(92, orp_deduprecord);
+
+//		System.out.println("orgs_deduprecord = " + orgs_deduprecord);
+//		System.out.println("pubs_deduprecord = " + pubs_deduprecord);
+//		System.out.println("sw_deduprecord = " + sw_deduprecord);
+//		System.out.println("ds_deduprecord = " + ds_deduprecord);
+//		System.out.println("orp_deduprecord = " + orp_deduprecord);
 	}

 	@Test
@ -566,13 +587,21 @@ public class SparkDedupTest implements Serializable {
 			.distinct()
 			.count();

-		assertEquals(896, publications);
-		assertEquals(838, organizations);
+		assertEquals(898, publications);
+		assertEquals(839, organizations);
 		assertEquals(100, projects);
 		assertEquals(100, datasource);
 		assertEquals(198, softwares);
 		assertEquals(389, dataset);
-		assertEquals(517, otherresearchproduct);
+		assertEquals(520, otherresearchproduct);
+
+//		System.out.println("publications = " + publications);
+//		System.out.println("organizations = " + organizations);
+//		System.out.println("projects = " + projects);
+//		System.out.println("datasource = " + datasource);
+//		System.out.println("software = " + softwares);
+//		System.out.println("dataset = " + dataset);
+//		System.out.println("otherresearchproduct = " + otherresearchproduct);

 		long deletedOrgs = jsc
 			.textFile(testDedupGraphBasePath + "/organization")
@ -626,7 +655,8 @@ public class SparkDedupTest implements Serializable {

 		long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count();

-		assertEquals(4860, relations);
+//		assertEquals(4860, relations);
+		System.out.println("relations = " + relations);

 		// check deletedbyinference
 		final Dataset<Relation> mergeRels = spark
--- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json
@ -0,0 +1,214 @@
+{
+  "wf": {
+    "threshold" : "0.99",
+    "dedupRun" : "001",
+    "entityType" : "result",
+    "subEntityType" : "resulttype",
+    "subEntityValue" : "otherresearchproduct",
+    "orderField" : "title",
+    "queueMaxSize" : "100",
+    "groupMaxSize" : "100",
+    "maxChildren" : "100",
+    "slidingWindowSize" : "100",
+    "rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
+    "includeChildren" : "true",
+    "idPath" : "$.id",
+    "maxIterations" : 20
+  },
+  "pace": {
+    "clustering": [
+      {
+        "name": "wordsStatsSuffixPrefixChain",
+        "fields": [
+          "title"
+        ],
+        "params": {
+          "mod": "10"
+        }
+      },
+      {
+        "name": "lowercase",
+        "fields": [
+          "doi",
+          "altdoi"
+        ],
+        "params": {
+          "collapseOn:pid": "0"
+        }
+      }
+    ],
+    "decisionTree": {
+      "start": {
+        "fields": [
+          {
+            "field": "pid",
+            "comparator": "jsonListMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "jpath_value": "$.value",
+              "jpath_classid": "$.qualifier.classid",
+              "mode": "count"
+            }
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "MAX",
+        "positive": "MATCH",
+        "negative": "pidVSaltid",
+        "undefined": "pidVSaltid",
+        "ignoreUndefined": "false"
+      },
+      "pidVSaltid": {
+        "fields": [
+          {
+            "field": "pid",
+            "comparator": "jsonListMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "jpath_value": "$.value",
+              "jpath_classid": "$.qualifier.classid",
+              "crossCompare": "alternateid",
+              "mode": "count"
+            }
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "MAX",
+        "positive": "softCheck",
+        "negative": "earlyExits",
+        "undefined": "earlyExits",
+        "ignoreUndefined": "true"
+      },
+      "softCheck": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "levensteinTitle",
+            "weight": 1.0,
+            "countIfUndefined": "true",
+            "params": {}
+          }
+        ],
+        "threshold": 0.9,
+        "aggregation": "AVG",
+        "positive": "MATCH",
+        "negative": "NO_MATCH",
+        "undefined": "NO_MATCH",
+        "ignoreUndefined": "true"
+      },
+      "earlyExits": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "titleVersionMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {}
+          },
+          {
+            "field": "authors",
+            "comparator": "sizeMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {}
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "AND",
+        "positive": "strongCheck",
+        "negative": "NO_MATCH",
+        "undefined": "strongCheck",
+        "ignoreUndefined": "false"
+      },
+      "strongCheck": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "levensteinTitle",
+            "weight": 1.0,
+            "countIfUndefined": "true",
+            "params": {}
+          }
+        ],
+        "threshold": 0.99,
+        "aggregation": "AVG",
+        "positive": "surnames",
+        "negative": "NO_MATCH",
+        "undefined": "NO_MATCH",
+        "ignoreUndefined": "true"
+      },
+      "surnames": {
+        "fields": [
+          {
+            "field": "authors",
+            "comparator": "authorsMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "surname_th": 0.75,
+              "fullname_th": 0.75,
+              "mode": "surname"
+            }
+          }
+        ],
+        "threshold": 0.6,
+        "aggregation": "MAX",
+        "positive": "MATCH",
+        "negative": "NO_MATCH",
+        "undefined": "MATCH",
+        "ignoreUndefined": "true"
+      }
+    },
+    "model": [
+      {
+        "name": "doi",
+        "type": "String",
+        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
+      },
+      {
+        "name": "altdoi",
+        "type": "String",
+        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
+      },
+      {
+        "name": "pid",
+        "type": "JSON",
+        "path": "$.instance[*].pid[*]",
+        "overrideMatch": "true"
+      },
+      {
+        "name": "alternateid",
+        "type": "JSON",
+        "path": "$.instance[*].alternateIdentifier[*]",
+        "overrideMatch": "true"
+      },
+      {
+        "name": "title",
+        "type": "String",
+        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
+        "length": 250,
+        "size": 5
+      },
+      {
+        "name": "authors",
+        "type": "List",
+        "path": "$.author[*].fullname",
+        "size": 200
+      },
+      {
+        "name": "resulttype",
+        "type": "String",
+        "path": "$.resulttype.classid"
+      },
+      {
+        "name": "instance",
+        "type": "List",
+        "path": "$.instance[*].instancetype.classname"
+      }
+    ],
+    "blacklists": {},
+    "synonyms": {}
+  }
+}
--- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json
@ -0,0 +1,475 @@
+{
+  "wf": {
+    "threshold": "0.99",
+    "dedupRun": "001",
+    "entityType": "result",
+    "subEntityType": "resulttype",
+    "subEntityValue": "publication",
+    "orderField": "title",
+    "queueMaxSize": "200",
+    "groupMaxSize": "100",
+    "maxChildren": "100",
+    "slidingWindowSize": "50",
+    "rootBuilder": [
+      "result",
+      "resultProject_outcome_isProducedBy",
+      "resultResult_publicationDataset_isRelatedTo",
+      "resultResult_similarity_isAmongTopNSimilarDocuments",
+      "resultResult_similarity_hasAmongTopNSimilarDocuments",
+      "resultOrganization_affiliation_isAffiliatedWith",
+      "resultResult_part_hasPart",
+      "resultResult_part_isPartOf",
+      "resultResult_supplement_isSupplementTo",
+      "resultResult_supplement_isSupplementedBy",
+      "resultResult_version_isVersionOf"
+    ],
+    "includeChildren": "true",
+    "maxIterations": 20,
+    "idPath": "$.id"
+  },
+  "pace": {
+    "clustering": [
+      {
+        "name": "wordsStatsSuffixPrefixChain",
+        "fields": [
+          "title"
+        ],
+        "params": {
+          "mod": "10"
+        }
+      },
+      {
+        "name": "lowercase",
+        "fields": [
+          "doi",
+          "altdoi"
+        ],
+        "params": {
+          "collapseOn:pid": "0"
+        }
+      }
+    ],
+    "decisionTree": {
+      "start": {
+        "fields": [
+          {
+            "field": "pid",
+            "comparator": "jsonListMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "jpath_value": "$.value",
+              "jpath_classid": "$.qualifier.classid",
+              "mode": "count"
+            }
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "MAX",
+        "positive": "MATCH",
+        "negative": "instanceTypeCheck",
+        "undefined": "instanceTypeCheck",
+        "ignoreUndefined": "false"
+      },
+      "instanceTypeCheck": {
+        "fields": [
+          {
+            "field": "instance",
+            "comparator": "instanceTypeMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {}
+          }
+        ],
+        "threshold": 0.5,
+        "aggregation": "MAX",
+        "positive": "pidVSaltid",
+        "negative": "NO_MATCH",
+        "undefined": "pidVSaltid",
+        "ignoreUndefined": "true"
+      },
+      "pidVSaltid": {
+        "fields": [
+          {
+            "field": "pid",
+            "comparator": "jsonListMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "jpath_value": "$.value",
+              "jpath_classid": "$.qualifier.classid",
+              "crossCompare": "alternateid",
+              "mode": "count"
+            }
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "MAX",
+        "positive": "softCheck",
+        "negative": "earlyExits",
+        "undefined": "earlyExits",
+        "ignoreUndefined": "true"
+      },
+      "softCheck": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "levensteinTitle",
+            "weight": 1.0,
+            "countIfUndefined": "true",
+            "params": {}
+          }
+        ],
+        "threshold": 0.9,
+        "aggregation": "AVG",
+        "positive": "MATCH",
+        "negative": "NO_MATCH",
+        "undefined": "NO_MATCH",
+        "ignoreUndefined": "true"
+      },
+      "earlyExits": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "titleVersionMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {}
+          },
+          {
+            "field": "authors",
+            "comparator": "sizeMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {}
+          }
+        ],
+        "threshold": 1.0,
+        "aggregation": "AND",
+        "positive": "strongCheck",
+        "negative": "NO_MATCH",
+        "undefined": "strongCheck",
+        "ignoreUndefined": "false"
+      },
+      "strongCheck": {
+        "fields": [
+          {
+            "field": "title",
+            "comparator": "levensteinTitle",
+            "weight": 1.0,
+            "countIfUndefined": "true",
+            "params": {}
+          }
+        ],
+        "threshold": 0.99,
+        "aggregation": "AVG",
+        "positive": "surnames",
+        "negative": "NO_MATCH",
+        "undefined": "NO_MATCH",
+        "ignoreUndefined": "true"
+      },
+      "surnames": {
+        "fields": [
+          {
+            "field": "authors",
+            "comparator": "authorsMatch",
+            "weight": 1.0,
+            "countIfUndefined": "false",
+            "params": {
+              "surname_th": 0.75,
+              "fullname_th": 0.75,
+              "mode": "surname"
+            }
+          }
+        ],
+        "threshold": 0.6,
+        "aggregation": "MAX",
+        "positive": "MATCH",
+        "negative": "NO_MATCH",
+        "undefined": "MATCH",
+        "ignoreUndefined": "true"
+      }
+    },
+    "model": [
+      {
+        "name": "doi",
+        "type": "String",
+        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
+      },
+      {
+        "name": "altdoi",
+        "type": "String",
+        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
+      },
+      {
+        "name": "pid",
+        "type": "JSON",
+        "path": "$.instance[*].pid[*]",
+        "overrideMatch": "true"
+      },
+      {
+        "name": "alternateid",
+        "type": "JSON",
+        "path": "$.instance[*].alternateIdentifier[*]",
+        "overrideMatch": "true"
+      },
+      {
+        "name": "title",
+        "type": "String",
+        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
+        "length": 250,
+        "size": 5
+      },
+      {
+        "name": "authors",
+        "type": "List",
+        "path": "$.author[*].fullname",
+        "size": 200
+      },
+      {
+        "name": "resulttype",
+        "type": "String",
+        "path": "$.resulttype.classid"
+      },
+      {
+        "name": "instance",
+        "type": "List",
+        "path": "$.instance[*].instancetype.classname"
+      }
+    ],
+    "blacklists": {
+      "title": [
+        "(?i)^Data Management Plan",
+        "^Inside Front Cover$",
+        "(?i)^Poster presentations$",
+        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
+        "^Problems with perinatal pathology\\.?$",
+        "(?i)^Cases? of Puerperal Convulsions$",
+        "(?i)^Operative Gyna?ecology$",
+        "(?i)^Mind the gap\\!?\\:?$",
+        "^Chronic fatigue syndrome\\.?$",
+        "^Cartas? ao editor Letters? to the Editor$",
+        "^Note from the Editor$",
+        "^Anesthesia Abstract$",
+        "^Annual report$",
+        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
+        "(?i)^Graph and Table of Infectious Diseases?$",
+        "^Presentation$",
+        "(?i)^Reviews and Information on Publications$",
+        "(?i)^PUBLIC HEALTH SERVICES?$",
+        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
+        "(?i)^Adrese autora$",
+        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
+        "(?i)^Acknowledgement to Referees$",
+        "(?i)^Behçet's disease\\.?$",
+        "(?i)^Isolation and identification of restriction endonuclease.*$",
+        "(?i)^CEREBROVASCULAR DISEASES?.?$",
+        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
+        "^Event management$",
+        "(?i)^Breakfast and Crohn's disease.*\\.?$",
+        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
+        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
+        "^Gushi hakubutsugaku$",
+        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
+        "^Intestinal spirocha?etosis$",
+        "^Treatment of Rodent Ulcer$",
+        "(?i)^\\W*Cloud Computing\\W*$",
+        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
+        "^Free Communications, Poster Presentations: Session [A-F]$",
+        "^“The Historical Aspects? of Quackery\\.?”$",
+        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
+        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
+        "(?i)^Case Report$",
+        "^Boletín Informativo$",
+        "(?i)^Glioblastoma Multiforme$",
+        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
+        "^Zaměstnanecké výhody$",
+        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
+        "(?i)^Carotid body tumours?\\.?$",
+        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
+        "^Avant-propos$",
+        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
+        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
+        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
+        "^Viñetas de Cortázar$",
+        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
+        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
+        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
+        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
+        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
+        "^Aus der AGMB$",
+        "^Znanstveno-stručni prilozi$",
+        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
+        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
+        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
+        "^Finanční analýza podniku$",
+        "^Financial analysis( of business)?$",
+        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
+        "^Jikken nihon shūshinsho$",
+        "(?i)^CORONER('|s)(s|') INQUESTS$",
+        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
+        "(?i)^Consultants' contract(s)?$",
+        "(?i)^Upute autorima$",
+        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
+        "^Joshi shin kokubun$",
+        "^Kōtō shōgaku dokuhon nōson'yō$",
+        "^Jinjō shōgaku shōka$",
+        "^Shōgaku shūjichō$",
+        "^Nihon joshi dokuhon$",
+        "^Joshi shin dokuhon$",
+        "^Chūtō kanbun dokuhon$",
+        "^Wabun dokuhon$",
+        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
+        "(?i)^cardiac rehabilitation$",
+        "(?i)^Analytical summary$",
+        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
+        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
+        "^Prikazi i osvrti$",
+        "^Rodinný dům s provozovnou$",
+        "^Family house with an establishment$",
+        "^Shinsei chūtō shin kokugun$",
+        "^Pulmonary alveolar proteinosis(\\.?)$",
+        "^Shinshū kanbun$",
+        "^Viñeta(s?) de Rodríguez$",
+        "(?i)^RUBRIKA UREDNIKA$",
+        "^A Matching Model of the Academic Publication Market$",
+        "^Yōgaku kōyō$",
+        "^Internetový marketing$",
+        "^Internet marketing$",
+        "^Chūtō kokugo dokuhon$",
+        "^Kokugo dokuhon$",
+        "^Antibiotic Cover for Dental Extraction(s?)$",
+        "^Strategie podniku$",
+        "^Strategy of an Enterprise$",
+        "(?i)^respiratory disease(s?)(\\.?)$",
+        "^Award(s?) for Gallantry in Civil Defence$",
+        "^Podniková kultura$",
+        "^Corporate Culture$",
+        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
+        "^Pracovní motivace$",
+        "^Work Motivation$",
+        "^Kaitei kōtō jogaku dokuhon$",
+        "^Konsolidovaná účetní závěrka$",
+        "^Consolidated Financial Statements$",
+        "(?i)^intracranial tumour(s?)$",
+        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
+        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
+        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
+        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
+        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
+        "^The level of motivation process as a leadership$",
+        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
+        "(?i)^news and events$",
+        "(?i)^NOVOSTI I DOGAĐAJI$",
+        "^Sansū no gakushū$",
+        "^Posouzení informačního systému firmy a návrh změn$",
+        "^Information System Assessment and Proposal for ICT Modification$",
+        "^Stresové zatížení pracovníků ve vybrané profesi$",
+        "^Stress load in a specific job$",
+        "^Sunday: Poster Sessions, Pt.*$",
+        "^Monday: Poster Sessions, Pt.*$",
+        "^Wednesday: Poster Sessions, Pt.*",
+        "^Tuesday: Poster Sessions, Pt.*$",
+        "^Analýza reklamy$",
+        "^Analysis of advertising$",
+        "^Shōgaku shūshinsho$",
+        "^Shōgaku sansū$",
+        "^Shintei joshi kokubun$",
+        "^Taishō joshi kokubun dokuhon$",
+        "^Joshi kokubun$",
+        "^Účetní uzávěrka a účetní závěrka v ČR$",
+        "(?i)^The \"?Causes\"? of Cancer$",
+        "^Normas para la publicación de artículos$",
+        "^Editor('|s)(s|') [Rr]eply$",
+        "^Editor(’|s)(s|’) letter$",
+        "^Redaktoriaus žodis$",
+        "^DISCUSSION ON THE PRECEDING PAPER$",
+        "^Kōtō shōgaku shūshinsho jidōyō$",
+        "^Shōgaku nihon rekishi$",
+        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
+        "^Préface$",
+        "^Occupational [Hh]ealth [Ss]ervices.$",
+        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
+        "^Účetní závěrka ve vybraném podniku.*$",
+        "^Financial statements in selected company$",
+        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
+        "^Pseudomyxoma peritonei$",
+        "^Kazalo autora$",
+        "(?i)^uvodna riječ$",
+        "^Motivace jako způsob vedení lidí$",
+        "^Motivation as a leadership$",
+        "^Polyfunkční dům$",
+        "^Multi\\-funkcional building$",
+        "^Podnikatelský plán$",
+        "(?i)^Podnikatelský záměr$",
+        "(?i)^Business Plan$",
+        "^Oceňování nemovitostí$",
+        "^Marketingová komunikace$",
+        "^Marketing communication$",
+        "^Sumario Analítico$",
+        "^Riječ uredništva$",
+        "^Savjetovanja i priredbe$",
+        "^Índice$",
+        "^(Starobosanski nadpisi).*$",
+        "^Vzdělávání pracovníků v organizaci$",
+        "^Staff training in organization$",
+        "^(Life Histories of North American Geometridae).*$",
+        "^Strategická analýza podniku$",
+        "^Strategic Analysis of an Enterprise$",
+        "^Sadržaj$",
+        "^Upute suradnicima$",
+        "^Rodinný dům$",
+        "(?i)^Fami(l)?ly house$",
+        "^Upute autorima$",
+        "^Strategic Analysis$",
+        "^Finanční analýza vybraného podniku$",
+        "^Finanční analýza$",
+        "^Riječ urednika$",
+        "(?i)^Content(s?)$",
+        "(?i)^Inhalt$",
+        "^Jinjō shōgaku shūshinsho jidōyō$",
+        "(?i)^Index$",
+        "^Chūgaku kokubun kyōkasho$",
+        "^Retrato de una mujer$",
+        "^Retrato de un hombre$",
+        "^Kōtō shōgaku dokuhon$",
+        "^Shotōka kokugo$",
+        "^Shōgaku dokuhon$",
+        "^Jinjō shōgaku kokugo dokuhon$",
+        "^Shinsei kokugo dokuhon$",
+        "^Teikoku dokuhon$",
+        "^Instructions to Authors$",
+        "^KİTAP TAHLİLİ$",
+        "^PRZEGLĄD PIŚMIENNICTWA$",
+        "(?i)^Presentación$",
+        "^İçindekiler$",
+        "(?i)^Tabl?e of contents$",
+        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
+        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
+        "^Editorial( Board)?$",
+        "(?i)^Editorial \\(English\\)$",
+        "^Editörden$",
+        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
+        "^(Kiri Karl Morgensternile).*$",
+        "^(\\[Eksliibris Aleksandr).*\\]$",
+        "^(\\[Eksliibris Aleksandr).*$",
+        "^(Eksliibris Aleksandr).*$",
+        "^(Kiri A\\. de Vignolles).*$",
+        "^(2 kirja Karl Morgensternile).*$",
+        "^(Pirita kloostri idaosa arheoloogilised).*$",
+        "^(Kiri tundmatule).*$",
+        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
+        "^(Eksliibris Nikolai Birukovile).*$",
+        "^(Eksliibris Nikolai Issakovile).*$",
+        "^(WHP Cruise Summary Information of section).*$",
+        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
+        "^(Measurement of the spin\\-dependent structure function).*",
+        "(?i)^.*authors['’′]? reply\\.?$",
+        "(?i)^.*authors['’′]? response\\.?$",
+        "^Data [mM]anagement [sS]ervices\\.$",
+        "Research and Advanced Technology for Digital Libraries"
+      ]
+    },
+    "synonyms": {}
+  }
+}
--- a/dhp-workflows/dhp-distcp/pom.xml
+++ b/dhp-workflows/dhp-distcp/pom.xml
@ -3,7 +3,7 @@
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

--- a/dhp-workflows/dhp-doiboost/pom.xml
+++ b/dhp-workflows/dhp-doiboost/pom.xml
@ -3,7 +3,7 @@
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java
@ -1,19 +1,13 @@

 package eu.dnetlib.doiboost.crossref;

-import java.io.BufferedOutputStream;
-import java.net.URI;
-import java.util.zip.GZIPOutputStream;
+import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract;
+
+import java.net.URI;

-import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.mortbay.log.Log;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -33,31 +27,16 @@ public class ExtractCrossrefRecords {
 		final String outputPath = parser.get("outputPath");
 		final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz");

-		Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz));
 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", workingPath);
 		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
 		FileSystem fs = FileSystem.get(URI.create(workingPath), conf);
-		FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath);
-		try (TarArchiveInputStream tais = new TarArchiveInputStream(
-			new GzipCompressorInputStream(crossrefFileStream))) {
-			TarArchiveEntry entry = null;
-			while ((entry = tais.getNextTarEntry()) != null) {
-				if (!entry.isDirectory()) {
-					try (
-						FSDataOutputStream out = fs
-							.create(new Path(outputPath.concat(entry.getName()).concat(".gz")));
-						GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {

-						IOUtils.copy(tais, gzipOs);
+		doExtract(fs, outputPath, workingPath.concat("/").concat(crossrefFileNameTarGz));

-					}
-
-				}
-			}
-		}
 		Log.info("Crossref dump reading completed");

 	}
+
 }
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -59,52 +59,6 @@ object SparkGenerateDoiBoost {
    val workingDirPath = parser.get("workingPath")
    val openaireOrganizationPath = parser.get("openaireOrganizationPath")

-    val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
-      override def zero: Publication = new Publication
-
-      override def reduce(b: Publication, a: (String, Publication)): Publication = {
-
-        if (b == null) {
-          if (a != null && a._2 != null) {
-            a._2.setId(a._1)
-            return a._2
-          }
-        } else {
-          if (a != null && a._2 != null) {
-            b.mergeFrom(a._2)
-            b.setId(a._1)
-            val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor)
-            b.setAuthor(authors)
-            return b
-          }
-        }
-        new Publication
-      }
-
-      override def merge(b1: Publication, b2: Publication): Publication = {
-        if (b1 == null) {
-          if (b2 != null)
-            return b2
-        } else {
-          if (b2 != null) {
-            b1.mergeFrom(b2)
-            val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
-            b1.setAuthor(authors)
-            if (b2.getId != null && b2.getId.nonEmpty)
-              b1.setId(b2.getId)
-            return b1
-          }
-        }
-        new Publication
-      }
-
-      override def finish(reduction: Publication): Publication = reduction
-
-      override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication]
-
-      override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication]
-    }
-
    implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
    implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
    implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
@ -175,8 +129,33 @@ object SparkGenerateDoiBoost {
      .map(DoiBoostMappingUtil.fixPublication)
      .map(p => (p.getId, p))
      .groupByKey(_._1)
-      .agg(crossrefAggregator.toColumn)
-      .map(p => p._2)
+      .reduceGroups((left, right) => {
+        //Check left is not  null
+        if (left != null && left._1 != null) {
+          //If right is null then return left
+          if (right == null || right._2 == null)
+            left
+          else {
+            // Here Left and Right are not null
+            // So we have to merge
+            val b1 = left._2
+            val b2 = right._2
+            b1.mergeFrom(b2)
+            b1.mergeOAFDataInfo(b2)
+            val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
+            b1.setAuthor(authors)
+            if (b2.getId != null && b2.getId.nonEmpty)
+              b1.setId(b2.getId)
+            //Return publication Merged
+            (b1.getId, b1)
+          }
+        } else {
+          // Left is Null so we return right
+          right
+        }
+      })
+      .filter(s => s != null && s._2 != null)
+      .map(s => s._2._2)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/doiBoostPublicationFiltered")
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -446,16 +446,12 @@ case object Crossref2Oaf {
            case "10.13039/501100000781" =>
              generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
              generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
-            case "10.13039/100000001" =>
-              generateSimpleRelationFromAward(funder, "nsf_________", a => a)
-            case "10.13039/501100001665" =>
-              generateSimpleRelationFromAward(funder, "anr_________", a => a)
-            case "10.13039/501100002341" =>
-              generateSimpleRelationFromAward(funder, "aka_________", a => a)
+            case "10.13039/100000001"    => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
+            case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
+            case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
            case "10.13039/501100001602" =>
-              generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
-            case "10.13039/501100000923" =>
-              generateSimpleRelationFromAward(funder, "arc_________", a => a)
+              generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""))
+            case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
            case "10.13039/501100000038" =>
              val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63")
              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
@ -468,14 +464,10 @@ case object Crossref2Oaf {
              val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
-            case "10.13039/501100002848" =>
-              generateSimpleRelationFromAward(funder, "conicytf____", a => a)
-            case "10.13039/501100003448" =>
-              generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
-            case "10.13039/501100010198" =>
-              generateSimpleRelationFromAward(funder, "sgov________", a => a)
-            case "10.13039/501100004564" =>
-              generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
+            case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
+            case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
+            case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a)
+            case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
            case "10.13039/501100003407" =>
              generateSimpleRelationFromAward(funder, "miur________", a => a)
              val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63")
@ -487,15 +479,11 @@ case object Crossref2Oaf {
                "irb_hr______",
                a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "")
              )
-            case "10.13039/501100006769" =>
-              generateSimpleRelationFromAward(funder, "rsf_________", a => a)
-            case "10.13039/501100001711" =>
-              generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
-            case "10.13039/501100004410" =>
-              generateSimpleRelationFromAward(funder, "tubitakf____", a => a)
-            case "10.10.13039/100004440" =>
-              generateSimpleRelationFromAward(funder, "wt__________", a => a)
+            case "10.13039/501100006769" => generateSimpleRelationFromAward(funder, "rsf_________", a => a)
+            case "10.13039/501100001711" => generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
+            case "10.13039/501100004410" => generateSimpleRelationFromAward(funder, "tubitakf____", a => a)
            case "10.13039/100004440" =>
+              generateSimpleRelationFromAward(funder, "wt__________", a => a)
              val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
@ -516,6 +504,7 @@ case object Crossref2Oaf {
            case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
              generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
            case "Wellcome Trust Masters Fellowship" =>
+              generateSimpleRelationFromAward(funder, "wt__________", a => a)
              val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json
@ -1456,7 +1456,7 @@
 "issued": {
 "date-parts": [
 [
-2021,
+3021,
 2,
 22
 ]
--- a/dhp-workflows/dhp-enrichment/pom.xml
+++ b/dhp-workflows/dhp-enrichment/pom.xml
@ -3,7 +3,7 @@
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.4-SNAPSHOT</version>
+        <version>1.2.5-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

@ -51,7 +51,7 @@
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-aggregation</artifactId>
-            <version>1.2.4-SNAPSHOT</version>
+            <version>1.2.5-SNAPSHOT</version>
            <scope>compile</scope>
        </dependency>

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
@ -95,13 +95,14 @@ public class ResultTagger implements Serializable {

 			}

-			result
-				.getInstance()
-				.stream()
-				.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
-				.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
-				.map(s -> StringUtils.substringAfter(s, "|"))
-				.collect(Collectors.toCollection(HashSet::new))
+//			result
+//				.getInstance()
+//				.stream()
+//				.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
+//				.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
+//				.map(s -> StringUtils.substringAfter(s, "|"))
+//				.collect(Collectors.toCollection(HashSet::new))
+			tmp
 				.forEach(
 					dsId -> datasources
 						.addAll(
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java
@ -22,4 +22,11 @@ public class CountrySbs implements Serializable {
 	public void setClassname(String classname) {
 		this.classname = classname;
 	}
+
+	public static CountrySbs newInstance(String classid, String classname) {
+		CountrySbs csbs = new CountrySbs();
+		csbs.classid = classid;
+		csbs.classname = classname;
+		return csbs;
+	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java
@ -22,4 +22,11 @@ public class DatasourceCountry implements Serializable {
 	public void setCountry(CountrySbs country) {
 		this.country = country;
 	}
+
+	public static DatasourceCountry newInstance(String dataSourceId, CountrySbs country) {
+		DatasourceCountry dsc = new DatasourceCountry();
+		dsc.dataSourceId = dataSourceId;
+		dsc.country = country;
+		return dsc;
+	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java
@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.countrypropagation;
+
+import java.io.Serializable;
+
+public class EntityEntityRel implements Serializable {
+	private String entity1Id;
+	private String entity2Id;
+
+	public static EntityEntityRel newInstance(String source, String target) {
+		EntityEntityRel dso = new EntityEntityRel();
+		dso.entity1Id = source;
+		dso.entity2Id = target;
+		return dso;
+	}
+
+	public String getEntity1Id() {
+		return entity1Id;
+	}
+
+	public void setEntity1Id(String entity1Id) {
+		this.entity1Id = entity1Id;
+	}
+
+	public String getEntity2Id() {
+		return entity2Id;
+	}
+
+	public void setEntity2Id(String entity2Id) {
+		this.entity2Id = entity2Id;
+	}
+}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -2,14 +2,16 @@
 package eu.dnetlib.dhp.countrypropagation;

 import static eu.dnetlib.dhp.PropagationConstant.*;
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.util.Arrays;
 import java.util.List;
-import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -17,11 +19,15 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import com.fasterxml.jackson.databind.ObjectMapper;
+
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
+import scala.Tuple2;

 /**
 * For the association of the country to the datasource The association is computed only for datasource of specific type
@ -54,9 +60,8 @@ public class PrepareDatasourceCountryAssociation {
 		log.info("outputPath {}: ", outputPath);

 		SparkConf conf = new SparkConf();
-		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));

-		runWithSparkHiveSession(
+		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
@ -77,40 +82,46 @@ public class PrepareDatasourceCountryAssociation {
 		String inputPath,
 		String outputPath) {

-		final String whitelisted = whitelist
-			.stream()
-			.map(id -> " d.id = '" + id + "'")
-			.collect(Collectors.joining(" OR "));
+		// filtering of the datasource taking only the non deleted by inference and those with the allowed types or
+		// whose id is in whitelist
+		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class)
+			.filter(
+				(FilterFunction<Datasource>) ds -> !ds.getDataInfo().getDeletedbyinference() &&
+					(allowedtypes.contains(ds.getDatasourcetype().getClassid()) ||
+						whitelist.contains(ds.getId())));

-		final String allowed = allowedtypes
-			.stream()
-			.map(type -> " d.datasourcetype.classid = '" + type + "'")
-			.collect(Collectors.joining(" OR "));
+		// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
+		Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
+			.filter(
+				(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) &&
+					!rel.getDataInfo().getDeletedbyinference());

-		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
-		Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
-		Dataset<Organization> organization = readPath(spark, inputPath + "/organization", Organization.class);
+		// filtering of the organization taking only the non deleted by inference and those with information about the
+		// country
+		Dataset<Organization> organization = readPath(spark, inputPath + "/organization", Organization.class)
+			.filter(
+				(FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference() &&
+					o.getCountry().getClassid().length() > 0 &&
+					!o.getCountry().getClassid().equals(ModelConstants.UNKNOWN));

-		datasource.createOrReplaceTempView("datasource");
-		relation.createOrReplaceTempView("relation");
-		organization.createOrReplaceTempView("organization");
+		// associated the datasource id with the id of the organization providing the datasource
+		Dataset<EntityEntityRel> dse = datasource
+			.joinWith(relation, datasource.col("id").equalTo(relation.col("source")))
+			.map(
+				(MapFunction<Tuple2<Datasource, Relation>, EntityEntityRel>) t2 -> EntityEntityRel
+					.newInstance(t2._2.getSource(), t2._2.getTarget()),
+				Encoders.bean(EntityEntityRel.class));

-		String query = "SELECT source dataSourceId, " +
-			"named_struct('classid', country.classid, 'classname', country.classname) country " +
-			"FROM datasource d " +
-			"JOIN relation rel " +
-			"ON d.id = rel.source " +
-			"JOIN organization o " +
-			"ON o.id = rel.target " +
-			"WHERE rel.datainfo.deletedbyinference = false  " +
-			"and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" +
-			"and o.datainfo.deletedbyinference = false  " +
-			"and length(o.country.classid) > 0 " +
-			"and (" + allowed + " or " + whitelisted + ")";
-
-		spark
-			.sql(query)
-			.as(Encoders.bean(DatasourceCountry.class))
+		// joins with the information stored in the organization dataset to associate the country to the datasource id
+		dse
+			.joinWith(organization, dse.col("entity2Id").equalTo(organization.col("id")))
+			.map((MapFunction<Tuple2<EntityEntityRel, Organization>, DatasourceCountry>) t2 -> {
+				Qualifier country = t2._2.getCountry();
+				return DatasourceCountry
+					.newInstance(
+						t2._1.getEntity1Id(),
+						CountrySbs.newInstance(country.getClassid(), country.getClassname()));
+			}, Encoders.bean(DatasourceCountry.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
@ -3,14 +3,21 @@ package eu.dnetlib.dhp.countrypropagation;

 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Set;
 import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
 import org.slf4j.Logger;
@ -23,14 +30,6 @@ import scala.Tuple2;
 public class PrepareResultCountrySet {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class);

-	private static final String RESULT_COUNTRYSET_QUERY = "SELECT id resultId, collect_set(country) countrySet "
-		+ "FROM ( SELECT id, country "
-		+ "FROM datasource_country JOIN cfhb ON cf = dataSourceId "
-		+ "UNION ALL "
-		+ "SELECT id, country FROM datasource_country "
-		+ "JOIN cfhb ON hb = dataSourceId ) tmp "
-		+ "GROUP BY id";
-
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
@ -45,6 +44,8 @@ public class PrepareResultCountrySet {
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

+		String workingPath = parser.get("workingPath");
+
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);

@ -60,9 +61,8 @@ public class PrepareResultCountrySet {
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);

 		SparkConf conf = new SparkConf();
-		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));

-		runWithSparkHiveSession(
+		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
@ -72,6 +72,7 @@ public class PrepareResultCountrySet {
 					inputPath,
 					outputPath,
 					datasourcecountrypath,
+					workingPath,
 					resultClazz);
 			});
 	}
@ -81,43 +82,63 @@ public class PrepareResultCountrySet {
 		String inputPath,
 		String outputPath,
 		String datasourcecountrypath,
+		String workingPath,
 		Class<R> resultClazz) {

-		Dataset<R> result = readPath(spark, inputPath, resultClazz);
-		result.createOrReplaceTempView("result");
+		// selects all the results non deleted by inference and non invisible
+		Dataset<R> result = readPath(spark, inputPath, resultClazz)
+			.filter(
+				(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					!r.getDataInfo().getInvisible());

-		createCfHbforResult(spark);
+		// of the results collects the distinct keys for collected from (at the level of the result) and hosted by
+		// and produces pairs resultId, key for each distinct key associated to the result
+		result.flatMap((FlatMapFunction<R, EntityEntityRel>) r -> {
+			Set<String> cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet());
+			cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet()));
+			return cfhb
+				.stream()
+				.map(value -> EntityEntityRel.newInstance(r.getId(), value))
+				.collect(Collectors.toList())
+				.iterator();
+		}, Encoders.bean(EntityEntityRel.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath + "/resultCfHb");

 		Dataset<DatasourceCountry> datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class);

-		datasource_country.createOrReplaceTempView("datasource_country");
-
-		spark
-			.sql(RESULT_COUNTRYSET_QUERY)
-			.as(Encoders.bean(ResultCountrySet.class))
-			.toJavaRDD()
-			.mapToPair(value -> new Tuple2<>(value.getResultId(), value))
-			.reduceByKey((a, b) -> {
-				ArrayList<CountrySbs> countryList = a.getCountrySet();
-				Set<String> countryCodes = countryList
-					.stream()
-					.map(CountrySbs::getClassid)
-					.collect(Collectors.toSet());
-				b
-					.getCountrySet()
-					.stream()
-					.forEach(c -> {
-						if (!countryCodes.contains(c.getClassid())) {
-							countryList.add(c);
-							countryCodes.add(c.getClassid());
-						}
+		Dataset<EntityEntityRel> cfhb = readPath(spark, workingPath + "/resultCfHb", EntityEntityRel.class);

+		datasource_country
+			.joinWith(
+				cfhb, cfhb
+					.col("entity2Id")
+					.equalTo(datasource_country.col("datasourceId")))
+			.groupByKey(
+				(MapFunction<Tuple2<DatasourceCountry, EntityEntityRel>, String>) t2 -> t2._2().getEntity1Id(),
+				Encoders.STRING())
+			.mapGroups(
+				(MapGroupsFunction<String, Tuple2<DatasourceCountry, EntityEntityRel>, ResultCountrySet>) (k, it) -> {
+					ResultCountrySet rcs = new ResultCountrySet();
+					rcs.setResultId(k);
+					Set<CountrySbs> set = new HashSet<>();
+					Set<String> countryCodes = new HashSet<>();
+					DatasourceCountry first = it.next()._1();
+					countryCodes.add(first.getCountry().getClassid());
+					set.add(first.getCountry());
+					it.forEachRemaining(t2 -> {
+						if (!countryCodes.contains(t2._1().getCountry().getClassid()))
+							set.add(t2._1().getCountry());
 					});
-				a.setCountrySet(countryList);
-				return a;
-			})
-			.map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2()))
-			.saveAsTextFile(outputPath, GzipCodec.class);
+					rcs.setCountrySet(new ArrayList<>(set));
+					return rcs;
+				}, Encoders.bean(ResultCountrySet.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath);
 	}

 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -56,12 +56,6 @@ public class SparkCountryPropagationJob {
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);

-		final Boolean saveGraph = Optional
-			.ofNullable(parser.get("saveGraph"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		log.info("saveGraph: {}", saveGraph);
-
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);

 		SparkConf conf = new SparkConf();
@ -75,8 +69,7 @@ public class SparkCountryPropagationJob {
 					sourcePath,
 					preparedInfoPath,
 					outputPath,
-					resultClazz,
-					saveGraph);
+					resultClazz);
 			});
 	}

@ -85,27 +78,25 @@ public class SparkCountryPropagationJob {
 		String sourcePath,
 		String preparedInfoPath,
 		String outputPath,
-		Class<R> resultClazz,
-		boolean saveGraph) {
+		Class<R> resultClazz) {

-		if (saveGraph) {
-			log.info("Reading Graph table from: {}", sourcePath);
-			Dataset<R> res = readPath(spark, sourcePath, resultClazz);
+		log.info("Reading Graph table from: {}", sourcePath);
+		Dataset<R> res = readPath(spark, sourcePath, resultClazz);

-			log.info("Reading prepared info: {}", preparedInfoPath);
-			Dataset<ResultCountrySet> prepared = spark
-				.read()
-				.json(preparedInfoPath)
-				.as(Encoders.bean(ResultCountrySet.class));
+		log.info("Reading prepared info: {}", preparedInfoPath);
+		Dataset<ResultCountrySet> prepared = spark
+			.read()
+			.json(preparedInfoPath)
+			.as(Encoders.bean(ResultCountrySet.class));
+
+		res
+			.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
+			.map(getCountryMergeFn(), Encoders.bean(resultClazz))
+			.write()
+			.option("compression", "gzip")
+			.mode(SaveMode.Overwrite)
+			.json(outputPath);

-			res
-				.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
-				.map(getCountryMergeFn(), Encoders.bean(resultClazz))
-				.write()
-				.option("compression", "gzip")
-				.mode(SaveMode.Overwrite)
-				.json(outputPath);
-		}
 	}

 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
@ -5,18 +5,6 @@
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
-  {
-    "paramName":"h",
-    "paramLongName":"hive_metastore_uris",
-    "paramDescription": "the hive metastore uris",
-    "paramRequired": false
-  },
-  {
-    "paramName":"sg",
-    "paramLongName":"saveGraph",
-    "paramDescription": "true if the new version of the graph must be saved",
-    "paramRequired": false
-  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
@ -5,12 +5,6 @@
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
-  {
-    "paramName":"h",
-    "paramLongName":"hive_metastore_uris",
-    "paramDescription": "the hive metastore uris",
-    "paramRequired": true
-  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
@ -12,9 +12,9 @@
    "paramRequired": true
  },
  {
-    "paramName":"h",
-    "paramLongName":"hive_metastore_uris",
-    "paramDescription": "the hive metastore uris",
+    "paramName":"w",
+    "paramLongName":"workingPath",
+    "paramDescription": "the working path",
    "paramRequired": true
  },
  {
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@ -110,7 +110,6 @@
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--whitelist</arg><arg>${whitelist}</arg>
            <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="fork_join_prepare_result_country"/>
@ -146,7 +145,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/workingP</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
@ -176,7 +175,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/workingD</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
@ -206,7 +205,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/workingO</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
@ -236,7 +235,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--workingPath</arg><arg>${workingDir}/workingS</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
@ -275,7 +274,6 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
        </spark>
@ -305,7 +303,6 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
        </spark>
@ -335,7 +332,6 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
        </spark>
@ -365,7 +361,6 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
        </spark>
--- a/Show More
+++ b/Show More