Merge branch 'master' into provision_indexing

2021-02-09 15:46:26 +01:00 · 2021-02-09 15:46:26 +01:00 · 62bd3c53ee
parent ff72fcd91a f1a852f278
commit 62bd3c53ee
136 changed files with 10191 additions and 764 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -1,10 +1,10 @@

 package eu.dnetlib.dhp.common;

-import com.google.common.collect.Maps;
-
 import java.util.Map;

+import com.google.common.collect.Maps;
+
 public class Constants {

 	public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
@ -1,6 +1,10 @@

 package eu.dnetlib.dhp.common;

+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.dump.oaf.*;
 import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
@ -10,10 +14,6 @@ import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

-import java.io.Serializable;
-import java.util.*;
-import java.util.stream.Collectors;
-
 public class GraphResultMapper implements Serializable {

 	public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
@ -291,12 +291,10 @@ public class GraphResultMapper implements Serializable {
 					.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
 					.collect(Collectors.toList()));

-
 		return out;

 	}

-
 	private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
 		CommunityInstance instance = new CommunityInstance();

@ -347,7 +345,6 @@ public class GraphResultMapper implements Serializable {

 	}

-
 	private static Subject getSubject(StructuredProperty s) {
 		Subject subject = new Subject();
 		subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.merge;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;

 import org.apache.commons.lang3.StringUtils;

@ -32,27 +33,33 @@ public class AuthorMerger {

 	}

-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
 		int pa = countAuthorsPids(a);
 		int pb = countAuthorsPids(b);
 		List<Author> base, enrich;
 		int sa = authorsSize(a);
 		int sb = authorsSize(b);

-		if (pa == pb) {
-			base = sa > sb ? a : b;
-			enrich = sa > sb ? b : a;
-		} else {
+		if (sa == sb) {
 			base = pa > pb ? a : b;
 			enrich = pa > pb ? b : a;
+		} else {
+			base = sa > sb ? a : b;
+			enrich = sa > sb ? b : a;
 		}
-		enrichPidFromList(base, enrich);
+		enrichPidFromList(base, enrich, threshold);
 		return base;
 	}

-	private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
+	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+		return mergeAuthor(a, b, THRESHOLD);
+	}
+
+	private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
 		if (base == null || enrich == null)
 			return;
+
+		// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
 		final Map<String, Author> basePidAuthorMap = base
 			.stream()
 			.filter(a -> a.getPid() != null && a.getPid().size() > 0)
@ -63,6 +70,7 @@ public class AuthorMerger {
 					.map(p -> new Tuple2<>(pidToComparableString(p), a)))
 			.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));

+		// <pid, Author> (list of pid that are missing in the other list)
 		final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
 			.stream()
 			.filter(a -> a.getPid() != null && a.getPid().size() > 0)
@ -83,10 +91,10 @@ public class AuthorMerger {
 						.max(Comparator.comparing(Tuple2::_1));

 					if (simAuthor.isPresent()) {
-						double th = THRESHOLD;
+						double th = threshold;
 						// increase the threshold if the surname is too short
 						if (simAuthor.get()._2().getSurname() != null
-							&& simAuthor.get()._2().getSurname().length() <= 3)
+							&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
 							th = 0.99;

 						if (simAuthor.get()._1() > th) {
@ -156,7 +164,7 @@ public class AuthorMerger {
 	}

 	private static String normalize(final String s) {
-		return nfd(s)
+		String[] normalized = nfd(s)
 			.toLowerCase()
 			// do not compact the regexes in a single expression, would cause StackOverflowError
 			// in case
@ -166,7 +174,12 @@ public class AuthorMerger {
 			.replaceAll("(\\p{Punct})+", " ")
 			.replaceAll("(\\d)+", " ")
 			.replaceAll("(\\n)+", " ")
-			.trim();
+			.trim()
+			.split(" ");
+
+		Arrays.sort(normalized);
+
+		return String.join(" ", normalized);
 	}

 	private static String nfd(final String s) {
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
@ -0,0 +1,100 @@
+
+package eu.dnetlib.dhp.oa.merge;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.pace.util.MapDocumentUtil;
+import scala.Tuple2;
+
+public class AuthorMergerTest {
+
+	private String publicationsBasePath;
+
+	private List<List<Author>> authors;
+
+	@BeforeEach
+	public void setUp() throws Exception {
+
+		publicationsBasePath = Paths
+			.get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI())
+			.toFile()
+			.getAbsolutePath();
+
+		authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
+			.stream()
+			.map(p -> p._2().getAuthor())
+			.collect(Collectors.toList());
+
+	}
+
+	@Test
+	public void mergeTest() { // used in the dedup: threshold set to 0.95
+
+		for (List<Author> authors1 : authors) {
+			System.out.println("List " + (authors.indexOf(authors1) + 1));
+			for (Author author : authors1) {
+				System.out.println(authorToString(author));
+			}
+		}
+
+		List<Author> merge = AuthorMerger.merge(authors);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(7, merge.size());
+
+	}
+
+	public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) {
+		List<Tuple2<String, T>> res = new ArrayList<>();
+		BufferedReader reader;
+		try {
+			reader = new BufferedReader(new FileReader(path));
+			String line = reader.readLine();
+			while (line != null) {
+				res
+					.add(
+						new Tuple2<>(
+							MapDocumentUtil.getJPathString("$.id", line),
+							new ObjectMapper().readValue(line, clazz)));
+				// read next line
+				line = reader.readLine();
+			}
+			reader.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
+		return res;
+	}
+
+	public String authorToString(Author a) {
+
+		String print = "Fullname = ";
+		print += a.getFullname() + " pid = [";
+		if (a.getPid() != null)
+			for (StructuredProperty sp : a.getPid()) {
+				print += sp.toComparableString() + " ";
+			}
+		print += "]";
+		return print;
+	}
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@ -105,6 +105,8 @@ public class ModelConstants {
 	public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
 		"10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");

+	public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE, DNET_COUNTRY_TYPE);
+
 	private static Qualifier qualifier(
 		final String classid,
 		final String classname,
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
@ -62,10 +62,9 @@ public abstract class Oaf implements Serializable {
 				.distinct() // relies on KeyValue.equals
 				.collect(Collectors.toList()));

-		mergeOAFDataInfo(o);
-
 		setLastupdatetimestamp(
-				Math.max(
+			Math
+				.max(
 					Optional.ofNullable(getLastupdatetimestamp()).orElse(0L),
 					Optional.ofNullable(o.getLastupdatetimestamp()).orElse(0L)));
 	}
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java
@ -351,8 +351,6 @@ public class Project extends OafEntity implements Serializable {
 			? p.getFundedamount()
 			: fundedamount;

-		// programme = mergeLists(programme, p.getProgramme());
-
 		h2020classification = mergeLists(h2020classification, p.getH2020classification());

 		mergeOAFDataInfo(e);
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java
@ -243,7 +243,7 @@ public class Result extends OafEntity implements Serializable {

 		Result r = (Result) e;

-		// TODO consider merging also Measures
+		measures = mergeLists(measures, r.getMeasures());

 		instance = mergeLists(instance, r.getInstance());

@ -323,13 +323,13 @@ public class Result extends OafEntity implements Serializable {
 		if (a.size() == b.size()) {
 			int msa = a
 				.stream()
-				.filter(i -> i.getValue() != null)
+				.filter(i -> i != null && i.getValue() != null)
 				.map(i -> i.getValue().length())
 				.max(Comparator.naturalOrder())
 				.orElse(0);
 			int msb = b
 				.stream()
-				.filter(i -> i.getValue() != null)
+				.filter(i -> i != null && i.getValue() != null)
 				.map(i -> i.getValue().length())
 				.max(Comparator.naturalOrder())
 				.orElse(0);
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java
@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * Class that maps the model of the bipFinder! input data.
+ * Only needed for deserialization purposes
+ */
+
+public class BipDeserialize extends HashMap<String, List<Score>> implements Serializable {
+
+	public BipDeserialize() {
+		super();
+	}
+
+	public List<Score> get(String key) {
+
+		if (super.get(key) == null) {
+			return new ArrayList<>();
+		}
+		return super.get(key);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java
@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Rewriting of the bipFinder input data by extracting the identifier of the result (doi)
+ */
+
+public class BipScore implements Serializable {
+	private String id; // doi
+	private List<Score> scoreList; // unit as given in the inputfile
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<Score> getScoreList() {
+		return scoreList;
+	}
+
+	public void setScoreList(List<Score> scoreList) {
+		this.scoreList = scoreList;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
@ -0,0 +1,85 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+/**
+ * Just collects all the atomic actions produced for the different results and saves them in
+ * outputpath for the ActionSet
+ */
+public class CollectAndSave implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				CollectAndSave.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}: ", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				collectAndSave(spark, inputPath, outputPath);
+			});
+	}
+
+	private static void collectAndSave(SparkSession spark, String inputPath, String outputPath) {
+		JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		sc
+			.sequenceFile(inputPath + "/publication", Text.class, Text.class)
+			.union(sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class))
+			.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
+			.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
+		;
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java
@ -0,0 +1,26 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+
+public class KeyValue implements Serializable {
+
+	private String key;
+	private String value;
+
+	public String getKey() {
+		return key;
+	}
+
+	public void setKey(String key) {
+		this.key = key;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java
@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+
+/**
+ * Subset of the information of the generic results that are needed to create the atomic action
+ */
+public class PreparedResult implements Serializable {
+	private String id; // openaire id
+	private String value; // doi
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java
@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * represents the score in the input file
+ */
+public class Score implements Serializable {
+
+	private String id;
+	private List<KeyValue> unit;
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<KeyValue> getUnit() {
+		return unit;
+	}
+
+	public void setUnit(List<KeyValue> unit) {
+		this.unit = unit;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
@ -0,0 +1,200 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import scala.Tuple2;
+
+/**
+ * created the Atomic Action for each tipe of results
+ */
+public class SparkAtomicActionScoreJob implements Serializable {
+
+	private static String DOI = "doi";
+	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkAtomicActionScoreJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}: ", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		final String bipScorePath = parser.get("bipScorePath");
+		log.info("bipScorePath: {}", bipScorePath);
+
+		final String resultClassName = parser.get("resultTableName");
+		log.info("resultTableName: {}", resultClassName);
+
+		Class<I> inputClazz = (Class<I>) Class.forName(resultClassName);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				prepareResults(spark, inputPath, outputPath, bipScorePath, inputClazz);
+			});
+	}
+
+	private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
+		String bipScorePath, Class<I> inputClazz) {
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
+			.textFile(bipScorePath)
+			.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
+
+		Dataset<BipScore> bipScores = spark
+			.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
+				BipScore bs = new BipScore();
+				bs.setId(key);
+				bs.setScoreList(entry.get(key));
+				return bs;
+			}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
+
+		System.out.println(bipScores.count());
+
+		Dataset<I> results = readPath(spark, inputPath, inputClazz);
+
+		results.createOrReplaceTempView("result");
+
+		Dataset<PreparedResult> preparedResult = spark
+			.sql(
+				"select pIde.value value, id " +
+					"from result " +
+					"lateral view explode (pid) p as pIde " +
+					"where dataInfo.deletedbyinference = false and pIde.qualifier.classid = '" + DOI + "'")
+			.as(Encoders.bean(PreparedResult.class));
+
+		bipScores
+			.joinWith(
+				preparedResult, bipScores.col("id").equalTo(preparedResult.col("value")),
+				"inner")
+			.map((MapFunction<Tuple2<BipScore, PreparedResult>, BipScore>) value -> {
+				BipScore ret = value._1();
+				ret.setId(value._2().getId());
+				return ret;
+			}, Encoders.bean(BipScore.class))
+			.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
+				Result ret = new Result();
+				ret.setDataInfo(getDataInfo());
+				BipScore first = it.next();
+				ret.setId(first.getId());
+
+				ret.setMeasures(getMeasure(first));
+				it.forEachRemaining(value -> ret.getMeasures().addAll(getMeasure(value)));
+
+				return ret;
+			}, Encoders.bean(Result.class))
+			.toJavaRDD()
+			.map(p -> new AtomicAction(inputClazz, p))
+			.mapToPair(
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
+
+	}
+
+	private static List<Measure> getMeasure(BipScore value) {
+		return value
+			.getScoreList()
+			.stream()
+			.map(score -> {
+				Measure m = new Measure();
+				m.setId(score.getId());
+				m
+					.setUnit(
+						score
+							.getUnit()
+							.stream()
+							.map(unit -> {
+								KeyValue kv = new KeyValue();
+								kv.setValue(unit.getValue());
+								kv.setKey(unit.getKey());
+								kv.setDataInfo(getDataInfo());
+								return kv;
+							})
+							.collect(Collectors.toList()));
+				return m;
+			})
+			.collect(Collectors.toList());
+	}
+
+	private static DataInfo getDataInfo() {
+		DataInfo di = new DataInfo();
+		di.setInferred(false);
+		di.setInvisible(false);
+		di.setDeletedbyinference(false);
+		di.setTrust("");
+		Qualifier qualifier = new Qualifier();
+		qualifier.setClassid("sysimport:actionset");
+		qualifier.setClassname("Harvested");
+		qualifier.setSchemename("dnet:provenanceActions");
+		qualifier.setSchemeid("dnet:provenanceActions");
+		di.setProvenanceaction(qualifier);
+		return di;
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+	public static <R> Dataset<R> readPath(
+		SparkSession spark, String inputPath, Class<R> clazz) {
+		return spark
+			.read()
+			.textFile(inputPath)
+			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "ip",
+    "paramLongName": "inputPath",
+    "paramDescription": "the URL from where to get the programme file",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json
@ -0,0 +1,32 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "ip",
+    "paramLongName": "inputPath",
+    "paramDescription": "the URL from where to get the programme file",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  },
+  {
+    "paramName": "rtn",
+    "paramLongName": "resultTableName",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  },
+  {
+    "paramName": "bsp",
+    "paramLongName": "bipScorePath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml
@ -0,0 +1,171 @@
+<workflow-app name="BipFinderScore" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>inputPath</name>
+            <description>the input path of the resources to be extended</description>
+        </property>
+
+        <property>
+            <name>bipScorePath</name>
+            <description>the path where to find the bipFinder scores</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the path where to store the actionset</description>
+        </property>
+    </parameters>
+
+    <start to="deleteoutputpath"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <action name="deleteoutputpath">
+        <fs>
+            <delete path="${outputPath}"/>
+            <mkdir path="${outputPath}"/>
+            <delete path="${workingDir}"/>
+            <mkdir path="${workingDir}"/>
+        </fs>
+        <ok to="atomicactions"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name="atomicactions">
+        <path start="atomicactions_publication"/>
+        <path start="atomicactions_dataset"/>
+        <path start="atomicactions_orp"/>
+        <path start="atomicactions_software"/>
+    </fork>
+
+    <action name="atomicactions_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for publications</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for datasets</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for orp</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for software</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_aa" to="collectandsave"/>
+
+    <action name="collectandsave">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>saves all the aa produced for the several types of results in the as output path</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${workingDir}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java
@ -0,0 +1,323 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+
+public class SparkAtomicActionScoreJobTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(SparkAtomicActionScoreJobTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(SparkAtomicActionScoreJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(SparkAtomicActionScoreJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SparkAtomicActionScoreJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	public void matchOne() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 1);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(2, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
+				execVerification.select("oaid").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				"1.47565045883e-08",
+				execVerification.filter("id = 'influence'").select("value").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.227515392",
+				execVerification.filter("id = 'popularity'").select("value").collectAsList().get(0).getString(0));
+
+	}
+
+	@Test
+	public void matchOneWithTwo() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 1);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(4, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
+				execVerification.select("oaid").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'influence'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'popularity'").count());
+
+		List<Row> tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList();
+		String tmp_influence = tmp_ds.get(0).getString(0);
+		Assertions
+			.assertTrue(
+				"1.47565045883e-08".equals(tmp_influence) ||
+					"1.98956540239e-08".equals(tmp_influence));
+
+		tmp_influence = tmp_ds.get(1).getString(0);
+		Assertions
+			.assertTrue(
+				"1.47565045883e-08".equals(tmp_influence) ||
+					"1.98956540239e-08".equals(tmp_influence));
+
+		Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0)));
+
+	}
+
+	@Test
+	public void matchTwo() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 2);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(4, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'influence'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'popularity'").count());
+
+		Assertions
+			.assertEquals(
+				"1.47565045883e-08",
+				execVerification
+					.filter(
+						"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
+							"and id = 'influence'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"1.98956540239e-08",
+				execVerification
+					.filter(
+						"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
+							"and id = 'influence'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.282046161584",
+				execVerification
+					.filter(
+						"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
+							"and id = 'popularity'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.227515392",
+				execVerification
+					.filter(
+						"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
+							"and id = 'popularity'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java
@ -32,15 +32,15 @@ public class CheckDuplictedIdsJob {
 			IOUtils
 				.toString(
 					CheckDuplictedIdsJob.class
-						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
+						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json")));
 		parser.parseArgument(args);

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

-		final String countPath = parser.get("workingPath") + "/counts";
+		final String countPath = parser.get("outputDir") + "/counts";
 		log.info("countPath: {}", countPath);

 		final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
@ -59,6 +59,7 @@ public class CheckDuplictedIdsJob {
 			.map(o -> ClusterUtils.incrementAccumulator(o, total), Encoders.tuple(Encoders.STRING(), Encoders.LONG()))
 			.write()
 			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
 			.json(countPath);
 		;

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
@ -44,10 +44,10 @@ public class GenerateEventsJob {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String eventsPath = workingPath + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final Set<String> dsIdWhitelist = ClusterUtils.parseParamAsList(parser, "datasourceIdWhitelist");
@ -59,6 +59,9 @@ public class GenerateEventsJob {
 		final Set<String> dsIdBlacklist = ClusterUtils.parseParamAsList(parser, "datasourceIdBlacklist");
 		log.info("datasourceIdBlacklist: {}", StringUtils.join(dsIdBlacklist, ","));

+		final Set<String> topicWhitelist = ClusterUtils.parseParamAsList(parser, "topicWhitelist");
+		log.info("topicWhitelist: {}", StringUtils.join(topicWhitelist, ","));
+
 		final SparkConf conf = new SparkConf();

 		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
@ -70,12 +73,12 @@ public class GenerateEventsJob {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_events");

 			final Dataset<ResultGroup> groups = ClusterUtils
-				.readPath(spark, workingPath + "/duplicates", ResultGroup.class);
+				.readPath(spark, workingDir + "/duplicates", ResultGroup.class);

 			final Dataset<Event> dataset = groups
 				.map(
 					g -> EventFinder
-						.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators),
+						.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, topicWhitelist, accumulators),
 					Encoders
 						.bean(EventGroup.class))
 				.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
@ -46,7 +46,7 @@ public class GenerateStatsJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String dbUrl = parser.get("dbUrl");
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java
@ -46,7 +46,7 @@ public class IndexEventSubsetJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -55,6 +55,18 @@ public class IndexEventSubsetJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
 		log.info("maxEventsForTopic: {}", maxEventsForTopic);

@ -86,10 +98,10 @@ public class IndexEventSubsetJob {
 		esCfg.put("es.index.auto.create", "false");
 		esCfg.put("es.nodes", indexHost);
 		esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
-		esCfg.put("es.batch.write.retry.count", "8");
-		esCfg.put("es.batch.write.retry.wait", "60s");
-		esCfg.put("es.batch.size.entries", "200");
-		esCfg.put("es.nodes.wan.only", "true");
+		esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+		esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+		esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+		esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 		log.info("*** Start indexing");
 		JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
@ -54,7 +54,7 @@ public class IndexNotificationsJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -63,6 +63,18 @@ public class IndexNotificationsJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
 		log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);

@ -92,10 +104,10 @@ public class IndexNotificationsJob {
 			esCfg.put("es.index.auto.create", "false");
 			esCfg.put("es.nodes", indexHost);
 			esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
-			esCfg.put("es.batch.write.retry.count", "8");
-			esCfg.put("es.batch.write.retry.wait", "60s");
-			esCfg.put("es.batch.size.entries", "200");
-			esCfg.put("es.nodes.wan.only", "true");
+			esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+			esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+			esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+			esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 			log.info("*** Start indexing");
 			JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java
@ -36,7 +36,7 @@ public class IndexOnESJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -45,6 +45,18 @@ public class IndexOnESJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();

 		final JavaRDD<String> inputRdd = ClusterUtils
@ -53,15 +65,13 @@ public class IndexOnESJob {
 			.javaRDD();

 		final Map<String, String> esCfg = new HashMap<>();
-		// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
-
 		esCfg.put("es.index.auto.create", "false");
 		esCfg.put("es.nodes", indexHost);
 		esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
-		esCfg.put("es.batch.write.retry.count", "8");
-		esCfg.put("es.batch.write.retry.wait", "60s");
-		esCfg.put("es.batch.size.entries", "200");
-		esCfg.put("es.nodes.wan.only", "true");
+		esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+		esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+		esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+		esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 		JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep0Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep0Job.java
@ -42,10 +42,10 @@ public class JoinStep0Job {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step0";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step0";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -57,10 +57,10 @@ public class JoinStep0Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/simpleEntities", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/simpleEntities", OaBrokerMainEntity.class);

 			final Dataset<RelatedDatasource> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedDatasources", RelatedDatasource.class);
+				.readPath(spark, workingDir + "/relatedDatasources", RelatedDatasource.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedDatasource>, OaBrokerMainEntity> aggr = new RelatedDatasourceAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep1Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep1Job.java
@ -40,10 +40,10 @@ public class JoinStep1Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step1";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step1";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep1Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step0", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step0", OaBrokerMainEntity.class);

 			final Dataset<RelatedProject> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedProjects", RelatedProject.class);
+				.readPath(spark, workingDir + "/relatedProjects", RelatedProject.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedProject>, OaBrokerMainEntity> aggr = new RelatedProjectAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep2Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep2Job.java
@ -39,10 +39,10 @@ public class JoinStep2Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step2";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step2";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -54,10 +54,10 @@ public class JoinStep2Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step1", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step1", OaBrokerMainEntity.class);

 			final Dataset<RelatedSoftware> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedSoftwares", RelatedSoftware.class);
+				.readPath(spark, workingDir + "/relatedSoftwares", RelatedSoftware.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedSoftware>, OaBrokerMainEntity> aggr = new RelatedSoftwareAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep3Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep3Job.java
@ -40,10 +40,10 @@ public class JoinStep3Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step3";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step3";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep3Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step2", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step2", OaBrokerMainEntity.class);

 			final Dataset<RelatedDataset> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedDatasets", RelatedDataset.class);
+				.readPath(spark, workingDir + "/relatedDatasets", RelatedDataset.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedDataset>, OaBrokerMainEntity> aggr = new RelatedDatasetAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep4Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep4Job.java
@ -40,10 +40,10 @@ public class JoinStep4Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step4";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step4";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep4Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step3", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step3", OaBrokerMainEntity.class);

 			final Dataset<RelatedPublication> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedPublications", RelatedPublication.class);
+				.readPath(spark, workingDir + "/relatedPublications", RelatedPublication.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedPublication>, OaBrokerMainEntity> aggr = new RelatedPublicationAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
@ -36,7 +36,7 @@ import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
 public class PartitionEventsByDsIdJob {

 	private static final Logger log = LoggerFactory.getLogger(PartitionEventsByDsIdJob.class);
-	private static final String OPENDOAR_NSPREFIX = "10|opendoar____::";
+	private static final String OPENDOAR_NSPREFIX = "opendoar____::";

 	public static void main(final String[] args) throws Exception {

@ -55,10 +55,10 @@ public class PartitionEventsByDsIdJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

-		final String partitionPath = parser.get("workingPath") + "/eventsByOpendoarId";
+		final String partitionPath = parser.get("outputDir") + "/eventsByOpendoarId";
 		log.info("partitionPath: {}", partitionPath);

 		final String opendoarIds = parser.get("opendoarIds");
@ -91,6 +91,7 @@ public class PartitionEventsByDsIdJob {
 				.write()
 				.partitionBy("group")
 				.mode(SaveMode.Overwrite)
+				.option("compression", "gzip")
 				.json(partitionPath);

 		});
@ -122,6 +123,7 @@ public class PartitionEventsByDsIdJob {

 		final ShortEventMessageWithGroupId res = new ShortEventMessageWithGroupId();

+		res.setEventId(e.getEventId());
 		res.setOriginalId(payload.getResult().getOriginalId());
 		res.setTitle(payload.getResult().getTitles().stream().filter(StringUtils::isNotBlank).findFirst().orElse(null));
 		res.setTopic(e.getTopic());
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
@ -45,10 +45,10 @@ public class PrepareGroupsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String groupsPath = workingPath + "/duplicates";
+		final String groupsPath = workingDir + "/duplicates";
 		log.info("groupsPath: {}", groupsPath);

 		final SparkConf conf = new SparkConf();
@ -60,10 +60,10 @@ public class PrepareGroupsJob {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_groups");

 			final Dataset<OaBrokerMainEntity> results = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step4", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step4", OaBrokerMainEntity.class);

 			final Dataset<Relation> mergedRels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));

 			final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java
@ -42,10 +42,10 @@ public class PrepareRelatedDatasetsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedDatasets";
+		final String relsPath = workingDir + "/relatedDatasets";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -62,7 +62,7 @@ public class PrepareRelatedDatasetsJob {
 				.map(ConversionUtils::oafDatasetToBrokerDataset, Encoders.bean(OaBrokerRelatedDataset.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
@ -72,7 +72,8 @@ public class PrepareRelatedDatasetsJob {
 			final Dataset<RelatedDataset> dataset = rels
 				.joinWith(datasets, datasets.col("openaireId").equalTo(rels.col("target")), "inner")
 				.map(t -> {
-					final RelatedDataset rel = new RelatedDataset(t._1.getSource(), t._2);
+					final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
+						t._2);
 					rel.getRelDataset().setRelType(t._1.getRelClass());
 					return rel;
 				}, Encoders.bean(RelatedDataset.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java
@ -48,10 +48,10 @@ public class PrepareRelatedDatasourcesJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedDatasources";
+		final String relsPath = workingDir + "/relatedDatasources";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java
@ -44,10 +44,10 @@ public class PrepareRelatedProjectsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedProjects";
+		final String relsPath = workingDir + "/relatedProjects";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -64,7 +64,7 @@ public class PrepareRelatedProjectsJob {
 				.map(ConversionUtils::oafProjectToBrokerProject, Encoders.bean(OaBrokerProject.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
 				.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java
@ -43,10 +43,10 @@ public class PrepareRelatedPublicationsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedPublications";
+		final String relsPath = workingDir + "/relatedPublications";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -65,7 +65,7 @@ public class PrepareRelatedPublicationsJob {
 					Encoders.bean(OaBrokerRelatedPublication.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
@ -75,7 +75,8 @@ public class PrepareRelatedPublicationsJob {
 			final Dataset<RelatedPublication> dataset = rels
 				.joinWith(pubs, pubs.col("openaireId").equalTo(rels.col("target")), "inner")
 				.map(t -> {
-					final RelatedPublication rel = new RelatedPublication(t._1.getSource(), t._2);
+					final RelatedPublication rel = new RelatedPublication(
+						t._1.getSource(), t._2);
 					rel.getRelPublication().setRelType(t._1.getRelClass());
 					return rel;
 				}, Encoders.bean(RelatedPublication.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java
@ -44,10 +44,10 @@ public class PrepareRelatedSoftwaresJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedSoftwares";
+		final String relsPath = workingDir + "/relatedSoftwares";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -64,7 +64,7 @@ public class PrepareRelatedSoftwaresJob {
 				.map(ConversionUtils::oafSoftwareToBrokerSoftware, Encoders.bean(OaBrokerRelatedSoftware.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
@ -44,10 +44,10 @@ public class PrepareSimpleEntititiesJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String simpleEntitiesPath = workingPath + "/simpleEntities";
+		final String simpleEntitiesPath = workingDir + "/simpleEntities";
 		log.info("simpleEntitiesPath: {}", simpleEntitiesPath);

 		final SparkConf conf = new SparkConf();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java
@ -16,7 +16,24 @@ public class EnrichMissingSubject extends UpdateMatcher<OaBrokerTypedValue> {

 	public EnrichMissingSubject() {
 		super(20,
-			s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()),
+			s -> {
+				switch (s.getType().toLowerCase()) {
+					case "acm":
+						return Topic.ENRICH_MISSING_SUBJECT_ACM;
+					case "arxiv":
+						return Topic.ENRICH_MISSING_SUBJECT_ARXIV;
+					case "ddc":
+						return Topic.ENRICH_MISSING_SUBJECT_DDC;
+					case "jel":
+						return Topic.ENRICH_MISSING_SUBJECT_JEL;
+					case "mesh":
+						return Topic.ENRICH_MISSING_SUBJECT_MESHEUROPMC;
+					case "rvk":
+						return Topic.ENRICH_MISSING_SUBJECT_RVK;
+					default:
+						return null;
+				}
+			},
 			(p, s) -> p.getSubjects().add(s),
 			s -> subjectAsString(s));
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java
@ -16,7 +16,24 @@ public class EnrichMoreSubject extends UpdateMatcher<OaBrokerTypedValue> {

 	public EnrichMoreSubject() {
 		super(20,
-			s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
+			s -> {
+				switch (s.getType().toLowerCase()) {
+					case "acm":
+						return Topic.ENRICH_MORE_SUBJECT_ACM;
+					case "arxiv":
+						return Topic.ENRICH_MORE_SUBJECT_ARXIV;
+					case "ddc":
+						return Topic.ENRICH_MORE_SUBJECT_DDC;
+					case "jel":
+						return Topic.ENRICH_MORE_SUBJECT_JEL;
+					case "mesh":
+						return Topic.ENRICH_MORE_SUBJECT_MESHEUROPMC;
+					case "rvk":
+						return Topic.ENRICH_MORE_SUBJECT_RVK;
+					default:
+						return null;
+				}
+			},
 			(p, s) -> p.getSubjects().add(s),
 			s -> subjectAsString(s));
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java
@ -17,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Relation;

 public class ClusterUtils {

@ -30,6 +31,16 @@ public class ClusterUtils {
 		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}

+	public static Dataset<Relation> loadRelations(final String graphPath, final SparkSession spark) {
+		return ClusterUtils
+			.readPath(spark, graphPath + "/relation", Relation.class)
+			.map(r -> {
+				r.setSource(ConversionUtils.cleanOpenaireId(r.getSource()));
+				r.setTarget(ConversionUtils.cleanOpenaireId(r.getTarget()));
+				return r;
+			}, Encoders.bean(Relation.class));
+	}
+
 	public static <R> Dataset<R> readPath(
 		final SparkSession spark,
 		final String inputPath,
@ -67,6 +78,7 @@ public class ClusterUtils {
 			.map(o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz))
 			.write()
 			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
 			.json(path);
 	}

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
@ -74,7 +74,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
-		res.setOpenaireId(d.getId());
+		res.setOpenaireId(cleanOpenaireId(d.getId()));
 		res.setOriginalId(first(d.getOriginalId()));
 		res.setTitle(structPropValue(d.getTitle()));
 		res.setPids(mappedList(d.getPid(), ConversionUtils::oafPidToBrokerPid));
@ -89,7 +89,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
-		res.setOpenaireId(p.getId());
+		res.setOpenaireId(cleanOpenaireId(p.getId()));
 		res.setOriginalId(first(p.getOriginalId()));
 		res.setTitle(structPropValue(p.getTitle()));
 		res.setPids(mappedList(p.getPid(), ConversionUtils::oafPidToBrokerPid));
@ -106,7 +106,7 @@ public class ConversionUtils {

 		final OaBrokerMainEntity res = new OaBrokerMainEntity();

-		res.setOpenaireId(result.getId());
+		res.setOpenaireId(cleanOpenaireId(result.getId()));
 		res.setOriginalId(first(result.getOriginalId()));
 		res.setTypology(classId(result.getResulttype()));
 		res.setTitles(structPropList(result.getTitle()));
@ -129,6 +129,10 @@ public class ConversionUtils {
 		return res;
 	}

+	public static String cleanOpenaireId(final String id) {
+		return id.contains("|") ? StringUtils.substringAfter(id, "|") : id;
+	}
+
 	private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
 		if (author == null) {
 			return null;
@ -188,7 +192,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerProject res = new OaBrokerProject();
-		res.setOpenaireId(p.getId());
+		res.setOpenaireId(cleanOpenaireId(p.getId()));
 		res.setTitle(fieldValue(p.getTitle()));
 		res.setAcronym(fieldValue(p.getAcronym()));
 		res.setCode(fieldValue(p.getCode()));
@ -214,7 +218,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
-		res.setOpenaireId(sw.getId());
+		res.setOpenaireId(cleanOpenaireId(sw.getId()));
 		res.setName(structPropValue(sw.getTitle()));
 		res.setDescription(fieldValue(sw.getDescription()));
 		res.setRepository(fieldValue(sw.getCodeRepositoryUrl()));
@ -230,7 +234,7 @@ public class ConversionUtils {

 		final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
 		res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
-		res.setOpenaireId(ds.getId());
+		res.setOpenaireId(cleanOpenaireId(ds.getId()));
 		res.setType(classId(ds.getDatasourcetype()));
 		return res;
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java
@ -59,9 +59,18 @@ public class DatasourceRelationsAccumulator implements Serializable {
 		final DatasourceRelationsAccumulator res = new DatasourceRelationsAccumulator();
 		collectedFromSet
 			.stream()
-			.map(s -> new Tuple3<>(r.getId(), s, BrokerConstants.COLLECTED_FROM_REL))
+			.map(
+				s -> new Tuple3<>(ConversionUtils.cleanOpenaireId(r.getId()), ConversionUtils.cleanOpenaireId(s),
+					BrokerConstants.COLLECTED_FROM_REL))
 			.forEach(res::addTuple);
-		hostedBySet.stream().map(s -> new Tuple3<>(r.getId(), s, BrokerConstants.HOSTED_BY_REL)).forEach(res::addTuple);
+
+		hostedBySet
+			.stream()
+			.map(
+				s -> new Tuple3<>(ConversionUtils.cleanOpenaireId(r.getId()), ConversionUtils.cleanOpenaireId(s),
+					BrokerConstants.HOSTED_BY_REL))
+			.forEach(res::addTuple);
+
 		return res;
 	}

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java
@ -76,6 +76,7 @@ public class EventFinder {
 		final Set<String> dsIdWhitelist,
 		final Set<String> dsIdBlacklist,
 		final Set<String> dsTypeWhitelist,
+		final Set<String> topicWhitelist,
 		final Map<String, LongAccumulator> accumulators) {

 		final List<UpdateInfo<?>> list = new ArrayList<>();
@ -84,7 +85,13 @@ public class EventFinder {
 			for (final OaBrokerRelatedDatasource targetDs : target.getDatasources()) {
 				if (verifyTarget(targetDs, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist)) {
 					for (final UpdateMatcher<?> matcher : matchers) {
-						list.addAll(matcher.searchUpdatesForRecord(target, targetDs, results.getData(), accumulators));
+						for (final UpdateInfo<?> info : matcher
+							.searchUpdatesForRecord(target, targetDs, results.getData(), accumulators)) {
+							if (topicWhitelist == null || topicWhitelist.isEmpty()
+								|| topicWhitelist.contains(info.getTopic().getPath())) {
+								list.add(info);
+							}
+						}
 					}
 				}
 			}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/check_duplicates.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/check_duplicates.json
@ -0,0 +1,9 @@
+[
+
+	{
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the data are stored",
+		"paramRequired": true
+	}
+]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/common_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/common_params.json
@ -7,7 +7,7 @@
 	},
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
+		"paramLongName": "workingDir",
 		"paramDescription": "the path where the temporary data will be stored",
 		"paramRequired": true
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
@ -6,7 +6,7 @@
            <description>the path where the graph is stored</description>
        </property>
        <property>
-            <name>workingPath</name>
+            <name>outputDir</name>
            <description>the path where the the generated data will be stored</description>
        </property>
 		<property>
@ -24,6 +24,11 @@
            <value>-</value>
            <description>a black list (comma separeted, - for empty list) of datasource ids</description>
        </property>
+        <property>
+            <name>topicWhitelist</name>
+            <value>*</value>
+            <description>a white list (comma separeted, * for all) of topics</description>
+        </property>
        <property>
            <name>esEventIndexName</name>
            <description>the elasticsearch index name for events</description>
@ -36,6 +41,26 @@
            <name>esIndexHost</name>
            <description>the elasticsearch host</description>
        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
        <property>
        	<name>maxIndexedEventsForDsAndTopic</name>
        	<description>the max number of events for each couple (ds/topic)</description>
@ -111,15 +136,15 @@
        </configuration>
    </global>

-    <start to="ensure_working_path"/>
+    <start to="ensure_output_dir"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    
-    <action name="ensure_working_path">
+    <action name="ensure_output_dir">
        <fs>
-            <mkdir path='${workingPath}'/>
+            <mkdir path='${outputDir}'/>
        </fs>
        <ok to="start_entities_and_rels"/>
        <error to="Kill"/>
@ -152,7 +177,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -176,7 +201,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -201,7 +226,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -225,7 +250,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -249,7 +274,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -273,7 +298,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -299,7 +324,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step1"/>
        <error to="Kill"/>
@ -323,7 +348,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step2"/>
        <error to="Kill"/>
@ -347,7 +372,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step3"/>
        <error to="Kill"/>
@ -371,7 +396,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step4"/>
        <error to="Kill"/>
@ -395,7 +420,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="prepare_groups"/>
        <error to="Kill"/>
@ -419,7 +444,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="generate_events"/>
        <error to="Kill"/>
@ -442,10 +467,12 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
 			<arg>--datasourceIdWhitelist</arg><arg>${datasourceIdWhitelist}</arg>
 			<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
 			<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
+			<arg>--topicWhitelist</arg><arg>${topicWhitelist}</arg>
        </spark>
        <ok to="index_event_subset"/>
        <error to="Kill"/>
@ -468,9 +495,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esEventIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
@ -495,9 +526,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
        <ok to="stats"/>
@ -521,7 +556,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--dbUrl</arg><arg>${brokerDbUrl}</arg>
            <arg>--dbUser</arg><arg>${brokerDbUser}</arg>
            <arg>--dbPassword</arg><arg>${brokerDbPassword}</arg>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
@ -1,7 +1,13 @@
 [
+	{
+		"paramName": "wp",
+		"paramLongName": "workingDir",
+		"paramDescription": "the path where the temporary data are stored",
+		"paramRequired": true
+	},
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
+		"paramLongName": "outputDir",
 		"paramDescription": "the path where the generated events will be stored",
 		"paramRequired": true
 	},
@ -22,5 +28,11 @@
 		"paramLongName": "datasourceIdBlacklist",
 		"paramDescription": "a black list (comma separeted, - for empty list) of datasource ids",
 		"paramRequired": true
+	},
+	{
+		"paramName": "topicWhitelist",
+		"paramLongName": "topicWhitelist",
+		"paramDescription": "a white list (comma separeted, * for all) of topics",
+		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_es.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_es.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the data path",
 		"paramRequired": true
 	},
 	{
@ -16,5 +16,29 @@
 		"paramLongName": "esHost",
 		"paramDescription": "the ES host",
 		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_event_subset.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_event_subset.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the generated data are stored",
 		"paramRequired": true
 	},
 	{
@ -17,6 +17,30 @@
 		"paramDescription": "the ES host",
 		"paramRequired": true
 	},	
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},	
 	{
 		"paramName": "n",
 		"paramLongName": "maxEventsForTopic",
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_notifications.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_notifications.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the dir that contains the events folder",
 		"paramRequired": true
 	},
 	{
@ -17,6 +17,30 @@
 		"paramDescription": "the ES host",
 		"paramRequired": true
 	},
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
 	{
 		"paramName": "broker",
 		"paramLongName": "brokerApiBaseUrl",
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
@ -6,8 +6,8 @@
            <description>the path where the graph is stored</description>
        </property>
        <property>
-            <name>workingPath</name>
-            <description>the path where the the generated data will be stored</description>
+            <name>outputDir</name>
+            <description>the path where the the generated data are stored</description>
        </property>
 		<property>
            <name>datasourceIdWhitelist</name>
@ -36,6 +36,26 @@
            <name>esIndexHost</name>
            <description>the elasticsearch host</description>
        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
        <property>
        	<name>maxIndexedEventsForDsAndTopic</name>
        	<description>the max number of events for each couple (ds/topic)</description>
@ -122,9 +142,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the path where the temporary data will be stored",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the data will be stored",
 		"paramRequired": true
 	},
 	{
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
@ -6,7 +6,7 @@
            <description>the opendoar IDs whitelist (comma separated)</description>
        </property>
        <property>
-            <name>workingPath</name>
+            <name>outputDir</name>
            <description>the path where the the generated data will be stored</description>
        </property>
        <property>
@ -87,7 +87,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--opendoarIds</arg><arg>${opendoarIds}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/config-default.xml
@ -0,0 +1,18 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
@ -0,0 +1,140 @@
+<workflow-app name="reindex_events" xmlns="uri:oozie:workflow:0.5">
+
+    <parameters>
+        <property>
+            <name>outputDir</name>
+            <description>the path where the the generated data will be stored</description>
+        </property>
+        <property>
+            <name>esEventIndexName</name>
+            <description>the elasticsearch index name for events</description>
+        </property>
+        <property>
+            <name>esIndexHost</name>
+            <description>the elasticsearch host</description>
+        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
+        <property>
+        	<name>maxIndexedEventsForDsAndTopic</name>
+        	<description>the max number of events for each couple (ds/topic)</description>
+        </property>
+        <property>
+        	<name>brokerApiBaseUrl</name>
+        	<description>the url of the broker service api</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="index_event_subset"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+        
+     <action name="index_event_subset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>IndexEventSubsetOnESJob</name>
+            <class>eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob</class>
+            <jar>dhp-broker-events-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.dynamicAllocation.maxExecutors="8" 
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
+            <arg>--index</arg><arg>${esEventIndexName}</arg>
+            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
+            <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
+            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    
+   
+
+    <end name="End"/>
+
+</workflow-app>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats_params.json
@ -1,8 +1,8 @@
 [
 	{
-		"paramName": "wp",
-		"paramLongName": "workingPath",
-		"paramDescription": "the working path",
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where generated data are stored",
 		"paramRequired": true
 	},
 	{
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
@ -10,10 +10,11 @@ import java.io.Serializable;
 import java.nio.file.Paths;
 import java.util.*;

-import org.codehaus.jackson.map.ObjectMapper;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;

+import com.fasterxml.jackson.databind.ObjectMapper;
+
 import eu.dnetlib.dhp.oa.merge.AuthorMerger;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.pace.util.MapDocumentUtil;
@ -100,8 +101,8 @@ public class EntityMergerTest implements Serializable {
 		assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");

 		// verify authors
-		assertEquals(pub_merged.getAuthor().size(), 9);
-		assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
+		assertEquals(13, pub_merged.getAuthor().size());
+		assertEquals(4, AuthorMerger.countAuthorsPids(pub_merged.getAuthor()));

 		// verify title
 		int count = 0;
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateConnectedComponent.java
@ -7,7 +7,6 @@ import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.PairFunction;
@ -16,8 +15,8 @@ import org.apache.spark.rdd.RDD;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
-import org.codehaus.jackson.map.ObjectMapper;

+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.hash.Hashing;

 import eu.dnetlib.dedup.graph.ConnectedComponent;
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/SparkCreateSimRels.java
@ -10,7 +10,8 @@ import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
-import org.codehaus.jackson.map.ObjectMapper;
+
+import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@ -4,14 +4,13 @@ import eu.dnetlib.dhp.schema.action.AtomicAction
 import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
 import eu.dnetlib.dhp.utils.DHPUtils
 import org.apache.commons.lang3.StringUtils
-import org.codehaus.jackson.map.ObjectMapper
+import com.fasterxml.jackson.databind.ObjectMapper
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods.parse
 import org.slf4j.{Logger, LoggerFactory}

 import scala.collection.JavaConverters._
-import scala.io.Source


 case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
@ -19,23 +18,18 @@ case class HostedByItemType(id: String, officialname: String, issn: String, eiss
 case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){}

 object DoiBoostMappingUtil {
-  def getUnknownCountry(): Qualifier = {
-    createQualifier("UNKNOWN","UNKNOWN","dnet:countries","dnet:countries")
-  }
-
-

  def generateMAGAffiliationId(affId: String): String = {
    s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}"
  }

-
  val logger: Logger = LoggerFactory.getLogger(getClass)

  //STATIC STRING
  val MAG = "microsoft"
  val MAG_NAME = "Microsoft Academic Graph"
-  val ORCID = "ORCID"
+  val ORCID = "orcid"
+  val ORCID_PENDING = "orcid_pending"
  val CROSSREF = "Crossref"
  val UNPAYWALL = "UnpayWall"
  val GRID_AC = "grid.ac"
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala
@ -39,33 +39,38 @@ object SparkGenerateDOIBoostActionSet {
    val dbaffiliationRelationPath   = parser.get("dbaffiliationRelationPath")
    val dbOrganizationPath          = parser.get("dbOrganizationPath")
    val workingDirPath              = parser.get("targetPath")
+    val sequenceFilePath            = parser.get("sFilePath")

-    spark.read.load(dbDatasetPath).as[OafDataset]
+    val asDataset = spark.read.load(dbDatasetPath).as[OafDataset]
      .map(d =>DoiBoostMappingUtil.fixResult(d))
      .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
-      .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet")
+//      .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet")

-    spark.read.load(dbPublicationPath).as[Publication]
+    val asPublication =spark.read.load(dbPublicationPath).as[Publication]
      .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
-      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
+//      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")

-    spark.read.load(dbOrganizationPath).as[Organization]
+    val asOrganization = spark.read.load(dbOrganizationPath).as[Organization]
      .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
-      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
+//      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")


-    spark.read.load(crossRefRelation).as[Relation]
+    val asCRelation = spark.read.load(crossRefRelation).as[Relation]
      .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
-      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
+//      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")

-    spark.read.load(dbaffiliationRelationPath).as[Relation]
+    val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation]
      .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
-      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
+//      .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")


-    val d: Dataset[(String, String)] =spark.read.load(s"$workingDirPath/actionSet").as[(String,String)]

-    d.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingDirPath/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
+
+    val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation)
+
+//      spark.read.load(s"$workingDirPath/actionSet").as[(String,String)]
+
+    d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])



--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -2,6 +2,7 @@ package eu.dnetlib.doiboost

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.oa.merge.AuthorMerger
+import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset}
 import eu.dnetlib.doiboost.mag.ConversionUtil
 import org.apache.commons.io.IOUtils
@ -30,7 +31,7 @@ object SparkGenerateDoiBoost {
    import spark.implicits._

    val hostedByMapPath = parser.get("hostedByMapPath")
-    val workingDirPath = parser.get("workingDirPath")
+    val workingDirPath = parser.get("workingPath")


    implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
@ -132,7 +133,7 @@ object SparkGenerateDoiBoost {
          o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get))
        if (affiliation.OfficialPage.isDefined)
          o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get))
-        o.setCountry(DoiBoostMappingUtil.getUnknownCountry())
+        o.setCountry(ModelConstants.UNKNOWN_COUNTRY)
        o
      }
      else
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -200,7 +200,7 @@ case object Crossref2Oaf {
    a.setSurname(family)
    a.setFullname(s"$given $family")
    if (StringUtils.isNotBlank(orcid))
-      a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateDataInfo())).asJava)
+      a.setPid(List(createSP(orcid, ORCID_PENDING, PID_TYPES, generateDataInfo())).asJava)

    a
  }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java
@ -2,18 +2,16 @@
 package eu.dnetlib.doiboost.crossref;

 import java.io.ByteArrayOutputStream;
+import java.util.Optional;
 import java.util.zip.Inflater;

 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;

@ -30,34 +28,45 @@ public class CrossrefImporter {

 		parser.parseArgument(args);

-		final String hdfsuri = parser.get("namenode");
-		System.out.println("HDFS URI" + hdfsuri);
-		Path hdfswritepath = new Path(parser.get("targetPath"));
-		System.out.println("TargetPath: " + hdfsuri);
+		final String namenode = parser.get("namenode");
+		System.out.println("namenode: " + namenode);

-		final Long timestamp = StringUtils.isNotBlank(parser.get("timestamp"))
-			? Long.parseLong(parser.get("timestamp"))
-			: -1;
+		Path targetPath = new Path(parser.get("targetPath"));
+		System.out.println("targetPath: " + targetPath);

-		if (timestamp > 0)
-			System.out.println("Timestamp added " + timestamp);
+		final Long timestamp = Optional
+			.ofNullable(parser.get("timestamp"))
+			.map(s -> {
+				try {
+					return Long.parseLong(s);
+				} catch (NumberFormatException e) {
+					return -1L;
+				}
+			})
+			.orElse(-1L);
+		System.out.println("timestamp: " + timestamp);
+
+		final String esServer = parser.get("esServer");
+		System.out.println("esServer: " + esServer);
+
+		final String esIndex = parser.get("esIndex");
+		System.out.println("esIndex: " + esIndex);

 		// ====== Init HDFS File System Object
 		Configuration conf = new Configuration();
 		// Set FileSystem URI
-		conf.set("fs.defaultFS", hdfsuri);
+		conf.set("fs.defaultFS", namenode);
 		// Because of Maven
 		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

-		ESClient client = timestamp > 0
-			? new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref", timestamp)
-			: new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref");
+		// "ip-90-147-167-25.ct1.garrservices.it", "crossref"
+		final ESClient client = new ESClient(esServer, esIndex, timestamp);

 		try (SequenceFile.Writer writer = SequenceFile
 			.createWriter(
 				conf,
-				SequenceFile.Writer.file(hdfswritepath),
+				SequenceFile.Writer.file(targetPath),
 				SequenceFile.Writer.keyClass(IntWritable.class),
 				SequenceFile.Writer.valueClass(Text.class))) {

@ -74,8 +83,7 @@ public class CrossrefImporter {
 					end = System.currentTimeMillis();
 					final float time = (end - start) / 1000.0F;
 					System.out
-						.println(
-							String.format("Imported %d records last 100000 imported in %f seconds", i, time));
+						.println(String.format("Imported %s records last 100000 imported in %s seconds", i, time));
 					start = System.currentTimeMillis();
 				}
 			}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java
@ -1,11 +1,11 @@

 package eu.dnetlib.doiboost.crossref;

-import java.io.IOException;
 import java.util.Iterator;
 import java.util.List;

 import org.apache.commons.io.IOUtils;
+import org.apache.http.HttpHeaders;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.entity.StringEntity;
@ -17,13 +17,17 @@ import org.slf4j.LoggerFactory;
 import com.jayway.jsonpath.JsonPath;

 public class ESClient implements Iterator<String> {
-	private static final Logger logger = LoggerFactory.getLogger(ESClient.class);

-	static final String blobPath = "$.hits[*].hits[*]._source.blob";
-	static final String scrollIdPath = "$._scroll_id";
-	static final String JSON_NO_TS = "{\"size\":1000}";
-	static final String JSON_WITH_TS = "{\"size\":1000, \"query\":{\"range\":{\"timestamp\":{\"gte\":%d}}}}";
-	static final String JSON_SCROLL = "{\"scroll_id\":\"%s\",\"scroll\" : \"1m\"}";
+	private static final String BLOB_PATH = "$.hits.hits[*]._source.blob";
+	private static final String SCROLL_ID_PATH = "$._scroll_id";
+	private static final String JSON_NO_TS = "{\"size\":1000}";
+	private static final String JSON_WITH_TS = "{\"size\":1000, \"query\":{\"range\":{\"timestamp\":{\"gte\":%d}}}}";
+	private static final String JSON_SCROLL = "{\"scroll_id\":\"%s\",\"scroll\" : \"1m\"}";
+
+	public static final String APPLICATION_JSON = "application/json";
+
+	public static final String ES_SEARCH_URL = "http://%s:9200/%s/_search?scroll=1m";
+	public static final String ES_SCROLL_URL = "http://%s:9200/_search/scroll";

 	private final String scrollId;

@ -31,47 +35,30 @@ public class ESClient implements Iterator<String> {

 	private final String esHost;

-	public ESClient(final String esHost, final String esIndex) throws IOException {
-
+	public ESClient(final String esHost, final String esIndex, final long timestamp) {
 		this.esHost = esHost;
-		final String body = getResponse(
-			String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex), JSON_NO_TS);
-		scrollId = getJPathString(scrollIdPath, body);
-		buffer = getBlobs(body);
-	}

-	public ESClient(final String esHost, final String esIndex, final long timestamp)
-		throws IOException {
-		this.esHost = esHost;
-		final String body = getResponse(
-			String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex),
-			String.format(JSON_WITH_TS, timestamp));
-		scrollId = getJPathString(scrollIdPath, body);
+		final String body = timestamp > 0
+			? getResponse(String.format(ES_SEARCH_URL, esHost, esIndex), String.format(JSON_WITH_TS, timestamp))
+			: getResponse(String.format(ES_SEARCH_URL, esHost, esIndex), JSON_NO_TS);
+		scrollId = getJPathString(SCROLL_ID_PATH, body);
 		buffer = getBlobs(body);
 	}

 	private String getResponse(final String url, final String json) {
-		CloseableHttpClient client = HttpClients.createDefault();
-		try {
-
+		try (CloseableHttpClient client = HttpClients.createDefault()) {
 			HttpPost httpPost = new HttpPost(url);
 			if (json != null) {
 				StringEntity entity = new StringEntity(json);
 				httpPost.setEntity(entity);
-				httpPost.setHeader("Accept", "application/json");
-				httpPost.setHeader("Content-type", "application/json");
+				httpPost.setHeader(HttpHeaders.ACCEPT, APPLICATION_JSON);
+				httpPost.setHeader(HttpHeaders.CONTENT_TYPE, APPLICATION_JSON);
 			}
-			CloseableHttpResponse response = client.execute(httpPost);
-
+			try (CloseableHttpResponse response = client.execute(httpPost)) {
 				return IOUtils.toString(response.getEntity().getContent());
+			}
 		} catch (Throwable e) {
 			throw new RuntimeException("Error on executing request ", e);
-		} finally {
-			try {
-				client.close();
-			} catch (IOException e) {
-				throw new RuntimeException("Unable to close client ", e);
-			}
 		}
 	}

@ -87,7 +74,7 @@ public class ESClient implements Iterator<String> {
 	}

 	private List<String> getBlobs(final String body) {
-		final List<String> res = JsonPath.read(body, "$.hits.hits[*]._source.blob");
+		final List<String> res = JsonPath.read(body, BLOB_PATH);
 		return res;
 	}

@ -102,11 +89,11 @@ public class ESClient implements Iterator<String> {
 		if (buffer.isEmpty()) {

 			final String json_param = String.format(JSON_SCROLL, scrollId);
-			final String body = getResponse(String.format("http://%s:9200/_search/scroll", esHost), json_param);
+			final String body = getResponse(String.format(ES_SCROLL_URL, esHost), json_param);
 			try {
 				buffer = getBlobs(body);
 			} catch (Throwable e) {
-				logger.error("Error on  get next page: body:" + body);
+				System.out.println("Error on  get next page: body:" + body);
 			}
 		}
 		return nextItem;
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
@ -11,7 +11,7 @@ import org.slf4j.{Logger, LoggerFactory}

 import scala.collection.JavaConverters._

-object SparkPreProcessMAG {
+object SparkProcessMAG {
  def main(args: Array[String]): Unit = {

    val logger: Logger = LoggerFactory.getLogger(getClass)
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
@ -1,11 +1,11 @@
 package eu.dnetlib.doiboost.orcid

+import com.fasterxml.jackson.databind.ObjectMapper
 import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
 import eu.dnetlib.dhp.schema.orcid.OrcidDOI
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier}
 import org.apache.commons.lang.StringUtils
-import org.codehaus.jackson.map.ObjectMapper
 import org.slf4j.{Logger, LoggerFactory}

 import scala.collection.JavaConverters._
@ -18,7 +18,7 @@ case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,err
 case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
 object ORCIDToOAF {
  val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
-  val mapper = new ObjectMapper
+  val mapper = new ObjectMapper()

  def isJsonValid(inputStr: String): Boolean = {
    import java.io.IOException
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java
@ -3,10 +3,8 @@ package eu.dnetlib.doiboost.orcid;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

-import java.io.IOException;
 import java.text.SimpleDateFormat;
 import java.util.Date;
-import java.util.List;
 import java.util.Optional;

 import org.apache.commons.io.IOUtils;
@ -18,11 +16,9 @@ import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.util.LongAccumulator;
-import org.mortbay.log.Log;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -36,7 +32,7 @@ public class SparkDownloadOrcidAuthors {
 	static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
 	static final String lastUpdate = "2020-09-29 00:00:00";

-	public static void main(String[] args) throws IOException, Exception {
+	public static void main(String[] args) throws Exception {

 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
@ -51,12 +47,12 @@ public class SparkDownloadOrcidAuthors {
 			.orElse(Boolean.TRUE);
 		logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String workingPath = parser.get("workingPath");
-		logger.info("workingPath: ", workingPath);
+		logger.info("workingPath: {}", workingPath);
 		final String outputPath = parser.get("outputPath");
-		logger.info("outputPath: ", outputPath);
+		logger.info("outputPath: {}", outputPath);
 		final String token = parser.get("token");
 		final String lambdaFileName = parser.get("lambdaFileName");
-		logger.info("lambdaFileName: ", lambdaFileName);
+		logger.info("lambdaFileName: {}", lambdaFileName);

 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
@ -171,8 +167,8 @@ public class SparkDownloadOrcidAuthors {
 	}

 	private static boolean isModified(String orcidId, String modifiedDate) {
-		Date modifiedDateDt = null;
-		Date lastUpdateDt = null;
+		Date modifiedDateDt;
+		Date lastUpdateDt;
 		try {
 			if (modifiedDate.length() != 19) {
 				modifiedDate = modifiedDate.substring(0, 19);
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json
@ -5,5 +5,6 @@
  {"paramName": "cr",   "paramLongName":"crossRefRelation",                 "paramDescription": "the UnpayWall Publication Path",  "paramRequired": true},
  {"paramName": "da",   "paramLongName":"dbaffiliationRelationPath",        "paramDescription": "the MAG Publication Path",        "paramRequired": true},
  {"paramName": "do",   "paramLongName":"dbOrganizationPath",               "paramDescription": "the MAG Publication Path",        "paramRequired": true},
-  {"paramName": "w",    "paramLongName":"targetPath",                       "paramDescription": "the Working Path",                "paramRequired": true}
+  {"paramName": "w",    "paramLongName":"targetPath",                       "paramDescription": "the Working Path",                "paramRequired": true},
+  {"paramName": "sp",    "paramLongName":"sFilePath",                       "paramDescription": "the Sequence file Path",          "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json
@ -3,5 +3,5 @@
  {"paramName": "hb",   "paramLongName":"hostedByMapPath",            "paramDescription": "the hosted By Map Path",         "paramRequired": true},
  {"paramName": "ap",   "paramLongName":"affiliationPath",            "paramDescription": "the Affliation Path",            "paramRequired": true},
  {"paramName": "pa",   "paramLongName":"paperAffiliationPath",      "paramDescription": "the paperAffiliation Path",       "paramRequired": true},
-  {"paramName": "w",    "paramLongName":"workingDirPath",            "paramDescription": "the Working Path",                "paramRequired": true}
+  {"paramName": "w",    "paramLongName":"workingPath",                "paramDescription": "the Working Path",                "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_from_es.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_from_es.json
@ -1,5 +1,7 @@
 [
  {"paramName":"t",   "paramLongName":"targetPath",    "paramDescription": "the path of the sequencial file to write",   "paramRequired": true},
  {"paramName":"n",   "paramLongName":"namenode",      "paramDescription": "the hive metastore uris",                    "paramRequired": true},
-  {"paramName":"ts",   "paramLongName":"timestamp",         "paramDescription": "timestamp",                                  "paramRequired": false}
+  {"paramName":"ts",  "paramLongName":"timestamp",     "paramDescription": "timestamp",                                  "paramRequired": false},
+  {"paramName":"ess", "paramLongName":"esServer",     "paramDescription": "elasticsearch server url",                   "paramRequired": true},
+  {"paramName":"esi", "paramLongName":"esIndex",      "paramDescription": "elasticsearch index name",                   "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml
@ -0,0 +1,42 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
@ -0,0 +1,335 @@
+<workflow-app name="Generate DOIBoost ActionSet" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorIntersectionMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+
+
+        <!-- Itersection Parameters -->
+        <property>
+            <name>workingPath</name>
+            <description>the working Path</description>
+        </property>
+
+        <property>
+            <name>hostedByMapPath</name>
+            <description>the hostedByMap Path</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the Path of the sequence file action set</description>
+        </property>
+
+
+        <!-- Crossref Parameters -->
+        <property>
+            <name>inputPathCrossref</name>
+            <description>the Crossref input path</description>
+        </property>
+        <property>
+            <name>crossrefTimestamp</name>
+            <description>Timestamp for the Crossref incremental Harvesting</description>
+        </property>
+        <property>
+            <name>esServer</name>
+            <description>elasticsearch server url for the Crossref Harvesting</description>
+        </property>
+        <property>
+            <name>esIndex</name>
+            <description>elasticsearch index name for the Crossref Harvesting</description>
+        </property>
+
+        <!--    MAG Parameters    -->
+        <property>
+            <name>inputPathMAG</name>
+            <description>the MAG working path</description>
+        </property>
+
+
+        <!--    UnpayWall Parameters    -->
+        <property>
+            <name>inputPathUnpayWall</name>
+            <description>the UnpayWall working path</description>
+        </property>
+
+        <!--    ORCID Parameters    -->
+        <property>
+            <name>inputPathOrcid</name>
+            <description>the ORCID working path</description>
+        </property>
+
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="resume_from"/>
+
+    <decision name="resume_from">
+        <switch>
+            <case to="ConvertCrossrefToOAF">${wf:conf('resumeFrom') eq 'ConvertCrossrefToOAF'}</case>
+            <case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
+            <case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
+            <case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
+            <case to="ProcessORCID">${wf:conf('resumeFrom') eq 'PreprocessORCID'}</case>
+            <case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
+            <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
+            <default to="ImportCrossRef"/>
+        </switch>
+    </decision>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ImportCrossRef">
+        <java>
+            <main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>
+            <arg>--targetPath</arg><arg>${inputPathCrossref}/index_update</arg>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--esServer</arg><arg>${esServer}</arg>
+            <arg>--esIndex</arg><arg>${esIndex}</arg>
+            <arg>--timestamp</arg><arg>${crossrefTimestamp}</arg>
+        </java>
+        <ok to="GenerateCrossrefDataset"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <!-- CROSSREF SECTION -->
+
+    <action name="GenerateCrossrefDataset">
+            <spark xmlns="uri:oozie:spark-action:0.2">
+                <master>yarn-cluster</master>
+                <mode>cluster</mode>
+                <name>GenerateCrossrefDataset</name>
+                <class>eu.dnetlib.doiboost.crossref.CrossrefDataset</class>
+                <jar>dhp-doiboost-${projectVersion}.jar</jar>
+                <spark-opts>
+                    --executor-memory=${sparkExecutorMemory}
+                    --executor-cores=${sparkExecutorCores}
+                    --driver-memory=${sparkDriverMemory}
+                    --conf spark.sql.shuffle.partitions=3840
+                    ${sparkExtraOPT}
+                </spark-opts>
+                <arg>--workingPath</arg><arg>${inputPathCrossref}</arg>
+                <arg>--master</arg><arg>yarn-cluster</arg>
+            </spark>
+            <ok to="RenameDataset"/>
+            <error to="Kill"/>
+    </action>
+
+    <action name="RenameDataset">
+        <fs>
+            <delete path="${inputPathCrossref}/crossref_ds"/>
+            <move source="${inputPathCrossref}/crossref_ds_updated"
+                  target="${inputPathCrossref}/crossref_ds"/>
+        </fs>
+        <ok to="ConvertCrossrefToOAF"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="ConvertCrossrefToOAF">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>ConvertCrossrefToOAF</name>
+            <class>eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${inputPathCrossref}/crossref_ds</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="ResetMagWorkingPath"/>
+        <error to="Kill"/>
+    </action>
+
+
+
+    <!-- MAG SECTION -->
+    <action name="ResetMagWorkingPath">
+        <fs>
+            <delete path="${inputPathMAG}/dataset"/>
+            <delete path="${inputPathMAG}/process"/>
+            <delete path="${inputPathMAG}/dataset"/>
+        </fs>
+        <ok to="ConvertMagToDataset"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ConvertMagToDataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Convert Mag to Dataset</name>
+            <class>eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${inputPathMAG}/input</arg>
+            <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="ProcessMAG"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ProcessMAG">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Convert Mag to OAF Dataset</name>
+            <class>eu.dnetlib.doiboost.mag.SparkProcessMAG</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${inputPathMAG}/dataset</arg>
+            <arg>--workingPath</arg><arg>${inputPathMAG}/process</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="ProcessUW"/>
+        <error to="Kill"/>
+    </action>
+
+    <!--  UnpayWall  SECTION -->
+
+    <action name="ProcessUW">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Convert UnpayWall to Dataset</name>
+            <class>eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${inputPathUnpayWall}/uw_extracted</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="ProcessORCID"/>
+        <error to="Kill"/>
+    </action>
+
+    <!--  ORCID  SECTION -->
+    <action name="ProcessORCID">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Convert ORCID to Dataset</name>
+            <class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="CreateDOIBoost"/>
+        <error to="Kill"/>
+    </action>
+
+    <!-- INTERSECTION SECTION-->
+    <action name="CreateDOIBoost">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Create DOIBoost Infospace</name>
+            <class>eu.dnetlib.doiboost.SparkGenerateDoiBoost</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorIntersectionMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
+            <arg>--affiliationPath</arg><arg>${inputPathMAG}/process/Affiliations</arg>
+            <arg>--paperAffiliationPath</arg><arg>${inputPathMAG}/process/PaperAuthorAffiliations</arg>
+            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="GenerateActionSet"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="GenerateActionSet">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>Generate DOIBoost ActionSet</name>
+            <class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
+                ${sparkExtraOPT}
+            </spark-opts>
+            <arg>--dbPublicationPath</arg><arg>${workingPath}/doiBoostPublicationFiltered</arg>
+            <arg>--dbDatasetPath</arg><arg>${workingPath}/crossrefDataset</arg>
+            <arg>--crossRefRelation</arg><arg>${workingPath}/crossrefRelation</arg>
+            <arg>--dbaffiliationRelationPath</arg><arg>${workingPath}/doiBoostPublicationAffiliation</arg>
+            <arg>--dbOrganizationPath</arg><arg>${workingPath}/doiBoostOrganization</arg>
+            <arg>--targetPath</arg><arg>${workingPath}/actionDataSet</arg>
+            <arg>--sFilePath</arg><arg>${outputPath}</arg>
+            <arg>--master</arg><arg>yarn-cluster</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
@ -1,9 +1,9 @@
 package eu.dnetlib.doiboost.orcid

+import com.fasterxml.jackson.databind.ObjectMapper
 import eu.dnetlib.dhp.schema.oaf.Publication
 import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass
 import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
-import org.codehaus.jackson.map.ObjectMapper
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 import org.slf4j.{Logger, LoggerFactory}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
@ -104,7 +104,7 @@ public class PrepareResultOrcidAssociationStep1 {
 			+ "               LATERAL VIEW EXPLODE (author) a AS MyT "
 			+ "               LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
 			+ "               WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
-			+ "                       lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
+			+ "                       lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
 			+ "               GROUP BY id) r_t "
 			+ " JOIN ("
 			+ "        SELECT source, target "
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
@ -108,7 +108,7 @@ public class SparkResultToCommunityFromOrganizationJob {
 					.stream()
 					.map(con -> con.getId())
 					.collect(Collectors.toList());
-				Result res = new Result();
+				R res = (R) ret.getClass().newInstance();
 				res.setId(ret.getId());
 				List<Context> propagatedContexts = new ArrayList<>();
 				for (String cId : communitySet) {
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
@ -130,7 +130,7 @@ public class SparkResultToCommunityThroughSemRelJob {
 						})
 					.filter(Objects::nonNull)
 					.collect(Collectors.toList());
-				Result r = new Result();
+				R r = (R) ret.getClass().newInstance();
 				r.setId(ret.getId());
 				r.setContext(contextList);
 				ret.mergeFrom(r);
--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
 import eu.dnetlib.dhp.schema.oaf.Dataset;

 public class ResultToCommunityJobTest {
@ -66,7 +65,7 @@ public class ResultToCommunityJobTest {
 	}

 	@Test
-	public void test1() throws Exception {
+	public void testSparkResultToCommunityThroughSemRelJob() throws Exception {
 		SparkResultToCommunityThroughSemRelJob
 			.main(
 				new String[] {
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
@ -15,8 +15,11 @@ import eu.dnetlib.dhp.schema.oaf.*;

 public class CleaningFunctions {

-	public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/";
-	public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
+	public static final String DOI_PREFIX_REGEX = "^10\\.";
+
+	public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
+	public static final int ORCID_LEN = 19;
+
 	public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";

 	public static final Set<String> PID_BLACKLIST = new HashSet<>();
@ -56,11 +59,17 @@ public class CleaningFunctions {
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
-				r.getAuthor().forEach(a -> {
+				r
+					.getAuthor()
+					.stream()
+					.filter(Objects::nonNull)
+					.forEach(a -> {
 						if (Objects.nonNull(a.getPid())) {
-						a.getPid().forEach(p -> {
-							fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
-						});
+							a
+								.getPid()
+								.stream()
+								.filter(Objects::nonNull)
+								.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES));
 						}
 					});
 			}
@ -86,7 +95,7 @@ public class CleaningFunctions {
 		} else if (value instanceof Organization) {
 			Organization o = (Organization) value;
 			if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
-				o.setCountry(qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_COUNTRY_TYPE));
+				o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
 			}
 		} else if (value instanceof Relation) {
 			// nothing to clean here
@ -101,6 +110,16 @@ public class CleaningFunctions {
 					.setLanguage(
 						qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
 			}
+			if (Objects.nonNull(r.getCountry())) {
+				r
+					.setCountry(
+						r
+							.getCountry()
+							.stream()
+							.filter(Objects::nonNull)
+							.filter(c -> StringUtils.isNotBlank(c.getClassid()))
+							.collect(Collectors.toList()));
+			}
 			if (Objects.nonNull(r.getSubject())) {
 				r
 					.setSubject(
@ -153,12 +172,14 @@ public class CleaningFunctions {
 			if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
 				r
 					.setResourcetype(
-						qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
+						qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
 			}
 			if (Objects.nonNull(r.getInstance())) {
 				for (Instance i : r.getInstance()) {
 					if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
-						i.setAccessright(qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
+						i
+							.setAccessright(
+								qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
 					}
 					if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
 						i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
@ -173,12 +194,22 @@ public class CleaningFunctions {
 				if (Objects.isNull(bestaccessrights)) {
 					r
 						.setBestaccessright(
-							qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
+							qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
 				} else {
 					r.setBestaccessright(bestaccessrights);
 				}
 			}
 			if (Objects.nonNull(r.getAuthor())) {
+				r
+					.setAuthor(
+						r
+							.getAuthor()
+							.stream()
+							.filter(a -> Objects.nonNull(a))
+							.filter(a -> StringUtils.isNotBlank(a.getFullname()))
+							.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
+							.collect(Collectors.toList()));
+
 				boolean nullRank = r
 					.getAuthor()
 					.stream()
@ -199,6 +230,7 @@ public class CleaningFunctions {
 								a
 									.getPid()
 									.stream()
+									.filter(Objects::nonNull)
 									.filter(p -> Objects.nonNull(p.getQualifier()))
 									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.map(p -> {
@ -211,14 +243,31 @@ public class CleaningFunctions {
 													.map(Qualifier::getClassid)
 													.orElse(""))
 											.orElse("");
-										if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
+										if (p
+											.getQualifier()
+											.getClassid()
+											.toLowerCase()
+											.contains(ModelConstants.ORCID)) {
+											if (pidProvenance
+												.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
 												p.getQualifier().setClassid(ModelConstants.ORCID);
 											} else {
 												p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
 											}
-										p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
+											final String orcid = p
+												.getValue()
+												.trim()
+												.toLowerCase()
+												.replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
+											if (orcid.length() == ORCID_LEN) {
+												p.setValue(orcid);
+											} else {
+												p.setValue("");
+											}
+										}
 										return p;
 									})
+									.filter(p -> StringUtils.isNotBlank(p.getValue()))
 									.collect(
 										Collectors
 											.toMap(
@ -286,7 +335,7 @@ public class CleaningFunctions {

 			// TODO add cleaning for more PID types as needed
 			case "doi":
-				pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, ""));
+				pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
 				break;
 		}
 		return pid;
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/GroupEntitiesAndRelationsSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/GroupEntitiesAndRelationsSparkJob.java
@ -21,6 +21,7 @@ import org.apache.spark.sql.expressions.Aggregator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.jayway.jsonpath.Configuration;
 import com.jayway.jsonpath.DocumentContext;
@ -44,7 +45,8 @@ public class GroupEntitiesAndRelationsSparkJob {

 	private final static String SOURCE_JPATH = "$.source";

-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);

 	public static void main(String[] args) throws Exception {

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@ -75,6 +75,8 @@ public abstract class AbstractMdRecordToOafMapper {
 	protected static final Qualifier MAG_PID_TYPE = qualifier(
 		"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);

+	protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
+
 	protected static final Map<String, String> nsContext = new HashMap<>();

 	static {
@ -244,25 +246,54 @@ public abstract class AbstractMdRecordToOafMapper {

 			final String originalId = ((Node) o).getText();

+			final String validationdDate = ((Node) o).valueOf("@validationDate");
+
 			if (StringUtils.isNotBlank(originalId)) {
 				final String projectId = createOpenaireId(40, originalId, true);

 				res
 					.add(
-						getRelation(
+						getRelationWithValidationDate(
 							docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
-							lastUpdateTimestamp));
+							lastUpdateTimestamp, validationdDate));
 				res
 					.add(
-						getRelation(
+						getRelationWithValidationDate(
 							projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
-							lastUpdateTimestamp));
+							lastUpdateTimestamp, validationdDate));
 			}
 		}

 		return res;
 	}

+	protected Relation getRelationWithValidationDate(final String source,
+		final String target,
+		final String relType,
+		final String subRelType,
+		final String relClass,
+		final KeyValue collectedFrom,
+		final DataInfo info,
+		final long lastUpdateTimestamp,
+		final String validationDate) {
+
+		final Relation r = getRelation(
+			source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp);
+		r.setValidated(StringUtils.isNotBlank(validationDate));
+		r.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
+
+		if (StringUtils.isNotBlank(validationDate)) {
+			r.setValidated(true);
+			r.setValidationDate(validationDate);
+			r.getDataInfo().setTrust(DEFAULT_TRUST_FOR_VALIDATED_RELS);
+		} else {
+			r.setValidated(false);
+			r.setValidationDate(null);
+		}
+
+		return r;
+	}
+
 	protected Relation getRelation(final String source,
 		final String target,
 		final String relType,
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -23,7 +23,15 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;

 import java.io.Closeable;
 import java.io.IOException;
@ -462,44 +470,48 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i

 				return Arrays.asList(r);
 			} else {
+				final String validationDate = rs.getString("curation_date");
+
 				final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
 				final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);

 				final Relation r1 = new Relation();
 				final Relation r2 = new Relation();

-				if (rs.getString(SOURCE_TYPE).equals("project")) {
+				r1.setValidated(true);
+				r1.setValidationDate(validationDate);
 				r1.setCollectedfrom(collectedFrom);
-					r1.setRelType(RESULT_PROJECT);
-					r1.setSubRelType(OUTCOME);
-					r1.setRelClass(PRODUCES);
-
-					r2.setCollectedfrom(collectedFrom);
-					r2.setRelType(RESULT_PROJECT);
-					r2.setSubRelType(OUTCOME);
-					r2.setRelClass(IS_PRODUCED_BY);
-				} else {
-					r1.setCollectedfrom(collectedFrom);
-					r1.setRelType(RESULT_RESULT);
-					r1.setSubRelType(RELATIONSHIP);
-					r1.setRelClass(IS_RELATED_TO);
-
-					r2.setCollectedfrom(collectedFrom);
-					r2.setRelType(RESULT_RESULT);
-					r2.setSubRelType(RELATIONSHIP);
-					r2.setRelClass(IS_RELATED_TO);
-				}
-
 				r1.setSource(sourceId);
 				r1.setTarget(targetId);
 				r1.setDataInfo(info);
 				r1.setLastupdatetimestamp(lastUpdateTimestamp);

+				r2.setValidationDate(validationDate);
+				r2.setValidated(true);
+				r2.setCollectedfrom(collectedFrom);
 				r2.setSource(targetId);
 				r2.setTarget(sourceId);
 				r2.setDataInfo(info);
 				r2.setLastupdatetimestamp(lastUpdateTimestamp);

+				if (rs.getString(SOURCE_TYPE).equals("project")) {
+					r1.setRelType(RESULT_PROJECT);
+					r1.setSubRelType(OUTCOME);
+					r1.setRelClass(PRODUCES);
+
+					r2.setRelType(RESULT_PROJECT);
+					r2.setSubRelType(OUTCOME);
+					r2.setRelClass(IS_PRODUCED_BY);
+				} else {
+					r1.setRelType(RESULT_RESULT);
+					r1.setSubRelType(RELATIONSHIP);
+					r1.setRelClass(IS_RELATED_TO);
+
+					r2.setRelType(RESULT_RESULT);
+					r2.setSubRelType(RELATIONSHIP);
+					r2.setRelClass(IS_RELATED_TO);
+				}
+
 				return Arrays.asList(r1, r2);
 			}

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -51,17 +51,19 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 			final Node n = (Node) o;
 			final Author author = new Author();
 			final String fullname = n.valueOf("./datacite:creatorName");
+			final String name = n.valueOf("./datacite:givenName");
+			final String surname = n.valueOf("./datacite:familyName");
+			if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) {
 				author.setFullname(fullname);

 				final PacePerson pp = new PacePerson(fullname, false);
-			final String name = n.valueOf("./datacite:givenName");
+
 				if (StringUtils.isBlank(name) & pp.isAccurate()) {
 					author.setName(pp.getNormalisedFirstName());
 				} else {
 					author.setName(name);
 				}

-			final String surname = n.valueOf("./datacite:familyName");
 				if (StringUtils.isBlank(surname) & pp.isAccurate()) {
 					author.setSurname(pp.getNormalisedSurname());
 				} else {
@ -77,6 +79,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 				author.setRank(pos++);
 				res.add(author);
 			}
+		}
 		return res;
 	}

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java
@ -11,7 +11,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.codehaus.jackson.map.ObjectMapper;
+
+import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.schema.oaf.Oaf;

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryClaims.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryClaims.sql
@ -1 +1 @@
-SELECT source_type, source_id, target_type, target_id, semantics FROM claim WHERE approved=TRUE;
+SELECT source_type, source_id, target_type, target_id, semantics, curation_date::text FROM claim WHERE approved=TRUE;
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -141,7 +141,10 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
 		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
 		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
-
+		assertTrue(r1.getValidated());
+		assertTrue(r2.getValidated());
+		assertEquals(r1.getValidationDate(), "2020-01-01");
+		assertEquals(r2.getValidationDate(), "2020-01-01");
 		// System.out.println(new ObjectMapper().writeValueAsString(p));
 		// System.out.println(new ObjectMapper().writeValueAsString(r1));
 		// System.out.println(new ObjectMapper().writeValueAsString(r2));
@ -246,6 +249,10 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
 		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
 		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
+		assertTrue(r1.getValidated());
+		assertTrue(r2.getValidated());
+		assertEquals(r1.getValidationDate(), "2020-01-01");
+		assertEquals(r2.getValidationDate(), "2020-01-01");
 	}

 	@Test
@ -355,7 +362,31 @@ public class MappersTest {
 		assertValidId(p.getId());
 		assertValidId(p.getCollectedfrom().get(0).getKey());
 		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
-		System.out.println(p.getTitle().get(0).getValue());
+		assertEquals(1, p.getAuthor().size());
+		assertEquals("OPEN", p.getBestaccessright().getClassid());
+		assertTrue(StringUtils.isNotBlank(p.getPid().get(0).getValue()));
+		assertTrue(StringUtils.isNotBlank(p.getPid().get(0).getQualifier().getClassid()));
+		assertEquals("dataset", p.getResulttype().getClassname());
+		assertEquals(1, p.getInstance().size());
+		assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid());
+		assertValidId(p.getInstance().get(0).getCollectedfrom().getKey());
+		assertValidId(p.getInstance().get(0).getHostedby().getKey());
+		assertEquals(
+			"http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue());
+		assertEquals(1, p.getInstance().get(0).getUrl().size());
+//		System.out.println(p.getInstance().get(0).getUrl().get(0));
+//		System.out.println(p.getInstance().get(0).getHostedby().getValue());
+		System.out.println(p.getPid().get(0).getValue());
+	}
+
+	@Test
+	void testTextGridNoAuthor() throws IOException {
+		final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid-noauthor.xml"));
+		final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
+
+		System.out.println("***************");
+		System.out.println(new ObjectMapper().writeValueAsString(list));
+		System.out.println("***************");
 	}

 	@Test
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java
@ -35,7 +35,7 @@ public class MigrateDbEntitiesApplicationTest {

 	private MigrateDbEntitiesApplication app;

-	@Mock
+	@Mock(lenient = true)
 	private ResultSet rs;

 	@Mock
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
@ -51,7 +51,7 @@
    <!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
    <dr:CobjCategory>0001</dr:CobjCategory>
    <oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
-    <oaf:projectid>corda_______::226852</oaf:projectid>
+    <oaf:projectid validationDate="2020-01-01">corda_______::226852</oaf:projectid>
    <oaf:accessrights>OPEN</oaf:accessrights>
    <oaf:hostedBy id="openaire____::issn226852" name="One Ecosystem"/>
    <oaf:collectedFrom
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
@ -89,7 +89,7 @@
    <oaf:language>und</oaf:language>
    <oaf:concept id="https://zenodo.org/communities/epfl"/>
    <oaf:hostedBy id="re3data_____::r3d100010468" name="Zenodo"/>
-    <oaf:projectid>corda_______::226852</oaf:projectid>
+    <oaf:projectid validationDate="2020-01-01">corda_______::226852</oaf:projectid>
    <oaf:collectedFrom id="re3data_____::r3d100010468" name="Zenodo"/>
    <oaf:refereed>0001</oaf:refereed>s
  </metadata>
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid-noauthor.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid-noauthor.xml
@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:datacite="http://datacite.org/schema/kernel-3"
+        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+        xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oai="http://www.openarchives.org/OAI/2.0/">
+    <oai:header xmlns="http://namespace.openaire.eu/"
+                xmlns:dc="http://purl.org/dc/elements/1.1/"
+                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+                xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+        <dri:objIdentifier>r3f52792889d::00002412cb25f2f3047712d00ab2c8eb</dri:objIdentifier>
+        <dri:recordIdentifier>hdl:11858/00-1734-0000-0003-EE73-2</dri:recordIdentifier>
+        <dri:dateOfCollection>2020-12-16T10:04:03.148Z</dri:dateOfCollection>
+        <oaf:datasourceprefix>r3f52792889d</oaf:datasourceprefix>
+        <identifier xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:rn8z.0</identifier>
+        <datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2012-01-29T20:54:12Z</datestamp>
+        <dr:dateOfTransformation>2020-12-16T16:02:37.562Z</dr:dateOfTransformation>
+    </oai:header>
+    <metadata>
+        <datacite:resource xmlns="http://www.openarchives.org/OAI/2.0/"
+                           xmlns:dc="http://purl.org/dc/elements/1.1/"
+                           xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+                           xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <datacite:identifier identifierType="Handle">hdl:11858/00-1734-0000-0003-EE73-2</datacite:identifier>
+            <datacite:creators>
+                <datacite:creator>
+                    <datacite:creatorName></datacite:creatorName>
+                    <datacite:nameIdentifier nameIdentifierScheme="pnd" schemeURI="https://ref.de.dariah.eu/pndsearch/pndquery.xql?id="></datacite:nameIdentifier>
+                </datacite:creator>
+            </datacite:creators>
+            <datacite:titles>
+                <datacite:title titleType="Other">Auf dem Trocknen</datacite:title>
+                <datacite:title titleType="Other">Detlev von Liliencron: Gute Nacht. Hinterlassene Gedichte, Berlin: Schuster &amp; Loeffler, 1909.</datacite:title>
+            </datacite:titles>
+            <datacite:publisher>TextGrid</datacite:publisher>
+            <datacite:publicationYear>2012</datacite:publicationYear>
+            <datacite:contributors>
+                <datacite:contributor contributorType="DataManager">
+                    <datacite:contributorName>tvitt@textgrid.de</datacite:contributorName>
+                </datacite:contributor>
+                <datacite:contributor contributorType="Other">
+                    <datacite:contributorName>Digitale Bibliothek</datacite:contributorName>
+                    <datacite:nameIdentifier nameIdentifierScheme="textgrid" schemeURI="http://www.textgridlab.org/schema/textgrid-metadata_2010.xsd">TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c</datacite:nameIdentifier>
+                </datacite:contributor>
+            </datacite:contributors>
+            <datacite:dates>
+                <datacite:date dateType="Created">2012-01-29T20:54:12Z</datacite:date>
+                <datacite:date dateType="Issued">2012-01-29T20:54:12Z</datacite:date>
+                <datacite:date dateType="Updated">2012-01-29T20:54:12Z</datacite:date>
+            </datacite:dates>
+            <datacite:resourceType resourceTypeGeneral="Dataset"/>
+            <alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3">
+                <datacite:alternateIdentifier alternateIdentifierType="URI" xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:rn8z.0</datacite:alternateIdentifier>
+                <alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/hdl:11858/00-1734-0000-0003-EE73-2</alternateIdentifier>
+            </alternateIdentifiers>
+            <datacite:relatedIdentifiers>
+                <datacite:relatedIdentifier relatedIdentifierType="Handle" relationType="IsPartOf">hdl:11858/00-1734-0000-0003-EE72-4</datacite:relatedIdentifier>
+            </datacite:relatedIdentifiers>
+            <datacite:sizes>
+                <datacite:size>527 Bytes</datacite:size>
+            </datacite:sizes>
+            <datacite:formats>
+                <datacite:format>text/tg.edition+tg.aggregation+xml</datacite:format>
+            </datacite:formats>
+            <datacite:version>0</datacite:version>
+            <datacite:rightsList>
+                <datacite:rights rightsURI="http://creativecommons.org/licenses/by/3.0/de/legalcode"> Der annotierte Datenbestand der Digitalen Bibliothek inklusive
+                    Metadaten sowie davon einzeln zugängliche Teile sind eine Abwandlung
+                    des Datenbestandes von www.editura.de durch TextGrid und werden
+                    unter der Lizenz Creative Commons Namensnennung 3.0 Deutschland
+                    Lizenz (by-Nennung TextGrid) veröffentlicht. Die Lizenz bezieht sich
+                    nicht auf die der Annotation zu Grunde liegenden allgemeinfreien
+                    Texte (Siehe auch Punkt 2 der Lizenzbestimmungen).</datacite:rights>
+                <datacite:rights rightsURI="info:eu-repo/semantics/openAccess"/>
+            </datacite:rightsList>
+            <datacite:descriptions>
+                <datacite:description descriptionType="Abstract"/>
+            </datacite:descriptions>
+            <datacite:geoLocations>
+                <datacite:geoLocation>
+                    <datacite:geoLocationPlace
+                            xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">Berlin</datacite:geoLocationPlace>
+                </datacite:geoLocation>
+            </datacite:geoLocations>
+        </datacite:resource>
+        <oaf:identifier identifierType="handle">hdl:11858/00-1734-0000-0003-EE73-2</oaf:identifier>
+        <dr:CobjCategory type="dataset">0021</dr:CobjCategory>
+        <oaf:refereed>0002</oaf:refereed>
+        <oaf:dateAccepted>2012-01-29</oaf:dateAccepted>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:license>http://creativecommons.org/licenses/by/3.0/de/legalcode</oaf:license>
+        <oaf:language>und</oaf:language>
+        <oaf:country>DE</oaf:country>
+        <oaf:hostedBy id="re3data_____::r3d100011365" name="TextGrid Repository"/>
+        <oaf:collectedFrom id="re3data_____::r3d100011365" name="TextGrid Repository"/>
+    </metadata>
+    <about xmlns:dc="http://purl.org/dc/elements/1.1/"
+           xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+           xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
+            <originDescription altered="true" harvestDate="2020-12-16T10:04:03.148Z">
+                <baseURL>https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai</baseURL>
+                <identifier>textgrid:rn8z.0</identifier>
+                <datestamp>2012-01-29T20:54:12Z</datestamp>
+                <metadataNamespace/>
+            </originDescription>
+        </provenance>
+        <oaf:datainfo>
+            <oaf:inferred>false</oaf:inferred>
+            <oaf:deletedbyinference>false</oaf:deletedbyinference>
+            <oaf:trust>0.9</oaf:trust>
+            <oaf:inferenceprovenance/>
+            <oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
+                                  classname="sysimport:crosswalk:datasetarchive"
+                                  schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
+        </oaf:datainfo>
+    </about>
+</record>
+
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml
@ -6,29 +6,29 @@
                xmlns:dc="http://purl.org/dc/elements/1.1/"
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
                xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-        <dri:objIdentifier>r3f52792889d::000051aa1f61d77d2c0b340091f8024e</dri:objIdentifier>
-        <dri:recordIdentifier>textgrid:q9cv.0</dri:recordIdentifier>
-        <dri:dateOfCollection>2020-11-17T09:34:11.128+01:00</dri:dateOfCollection>
+        <dri:objIdentifier>r3f52792889d::00002412cb25f2f3047712d00ab2c8eb</dri:objIdentifier>
+        <dri:recordIdentifier>hdl:11858/00-1734-0000-0003-EE73-2</dri:recordIdentifier>
+        <dri:dateOfCollection>2020-12-16T10:04:03.148Z</dri:dateOfCollection>
        <oaf:datasourceprefix>r3f52792889d</oaf:datasourceprefix>
-        <identifier xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:q9cv.0</identifier>
-        <datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2012-01-21T13:35:20Z</datestamp>
-        <dr:dateOfTransformation>2020-11-17T19:08:56.703+01:00</dr:dateOfTransformation>
+        <identifier xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:rn8z.0</identifier>
+        <datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2012-01-29T20:54:12Z</datestamp>
+        <dr:dateOfTransformation>2020-12-16T16:02:37.562Z</dr:dateOfTransformation>
    </oai:header>
    <metadata>
        <datacite:resource xmlns="http://www.openarchives.org/OAI/2.0/"
                           xmlns:dc="http://purl.org/dc/elements/1.1/"
                           xmlns:dri="http://www.driver-repository.eu/namespace/dri"
                           xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-            <datacite:identifier identifierType="Handle">hdl:11858/00-1734-0000-0003-7664-F</datacite:identifier>
+            <datacite:identifier identifierType="Handle">hdl:11858/00-1734-0000-0003-EE73-2</datacite:identifier>
            <datacite:creators>
                <datacite:creator>
-                    <datacite:creatorName>Hoffmann von Fallersleben, August Heinrich</datacite:creatorName>
-                    <datacite:nameIdentifier nameIdentifierScheme="pnd" schemeURI="https://de.dariah.eu/pnd-service">118552589</datacite:nameIdentifier>
+                    <datacite:creatorName>Liliencron, Detlev von</datacite:creatorName>
+                    <datacite:nameIdentifier nameIdentifierScheme="pnd" schemeURI="https://ref.de.dariah.eu/pndsearch/pndquery.xql?id=">118572954</datacite:nameIdentifier>
                </datacite:creator>
            </datacite:creators>
            <datacite:titles>
-                <datacite:title titleType="Other">Mailied</datacite:title>
-                <datacite:title titleType="Other">August Heinrich Hoffmann von Fallersleben: Unpolitische Lieder von Hoffmann von Fallersleben, 1. + 2. Theil, 1. Theil, Hamburg: Hoffmann und Campe, 1841.</datacite:title>
+                <datacite:title titleType="Other">Auf dem Trocknen</datacite:title>
+                <datacite:title titleType="Other">Detlev von Liliencron: Gute Nacht. Hinterlassene Gedichte, Berlin: Schuster &amp; Loeffler, 1909.</datacite:title>
            </datacite:titles>
            <datacite:publisher>TextGrid</datacite:publisher>
            <datacite:publicationYear>2012</datacite:publicationYear>
@ -38,21 +38,21 @@
                </datacite:contributor>
                <datacite:contributor contributorType="Other">
                    <datacite:contributorName>Digitale Bibliothek</datacite:contributorName>
-                    <datacite:nameIdentifier nameIdentifierScheme="textgrid">TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c</datacite:nameIdentifier>
+                    <datacite:nameIdentifier nameIdentifierScheme="textgrid" schemeURI="http://www.textgridlab.org/schema/textgrid-metadata_2010.xsd">TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c</datacite:nameIdentifier>
                </datacite:contributor>
            </datacite:contributors>
            <datacite:dates>
-                <datacite:date dateType="Created">2012-01-21T13:35:20Z</datacite:date>
-                <datacite:date dateType="Issued">2012-01-21T13:35:20Z</datacite:date>
-                <datacite:date dateType="Updated">2012-01-21T13:35:20Z</datacite:date>
+                <datacite:date dateType="Created">2012-01-29T20:54:12Z</datacite:date>
+                <datacite:date dateType="Issued">2012-01-29T20:54:12Z</datacite:date>
+                <datacite:date dateType="Updated">2012-01-29T20:54:12Z</datacite:date>
            </datacite:dates>
            <datacite:resourceType resourceTypeGeneral="Dataset"/>
            <alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3">
-                <datacite:alternateIdentifier alternateIdentifierType="URI" xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:q9cv.0</datacite:alternateIdentifier>
-                <alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/hdl:11858/00-1734-0000-0003-7664-F</alternateIdentifier>
+                <datacite:alternateIdentifier alternateIdentifierType="URI" xmlns="http://www.openarchives.org/OAI/2.0/">textgrid:rn8z.0</datacite:alternateIdentifier>
+                <alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/hdl:11858/00-1734-0000-0003-EE73-2</alternateIdentifier>
            </alternateIdentifiers>
            <datacite:relatedIdentifiers>
-                <datacite:relatedIdentifier relatedIdentifierType="Handle" relationType="IsPartOf">hdl:11858/00-1734-0000-0003-7666-B</datacite:relatedIdentifier>
+                <datacite:relatedIdentifier relatedIdentifierType="Handle" relationType="IsPartOf">hdl:11858/00-1734-0000-0003-EE72-4</datacite:relatedIdentifier>
            </datacite:relatedIdentifiers>
            <datacite:sizes>
                <datacite:size>527 Bytes</datacite:size>
@ -77,17 +77,18 @@
            <datacite:geoLocations>
                <datacite:geoLocation>
                    <datacite:geoLocationPlace
-                            xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">Hamburg</datacite:geoLocationPlace>
+                            xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">Berlin</datacite:geoLocationPlace>
                </datacite:geoLocation>
            </datacite:geoLocations>
        </datacite:resource>
-        <oaf:identifier identifierType="handle">hdl:11858/00-1734-0000-0003-7664-F</oaf:identifier>
+        <oaf:identifier identifierType="handle">hdl:11858/00-1734-0000-0003-EE73-2</oaf:identifier>
        <dr:CobjCategory type="dataset">0021</dr:CobjCategory>
        <oaf:refereed>0002</oaf:refereed>
-        <oaf:dateAccepted>2012-01-21</oaf:dateAccepted>
+        <oaf:dateAccepted>2012-01-29</oaf:dateAccepted>
        <oaf:accessrights>OPEN</oaf:accessrights>
        <oaf:license>http://creativecommons.org/licenses/by/3.0/de/legalcode</oaf:license>
        <oaf:language>und</oaf:language>
+        <oaf:country>DE</oaf:country>
        <oaf:hostedBy id="re3data_____::r3d100011365" name="TextGrid Repository"/>
        <oaf:collectedFrom id="re3data_____::r3d100011365" name="TextGrid Repository"/>
    </metadata>
@ -95,11 +96,11 @@
           xmlns:dri="http://www.driver-repository.eu/namespace/dri"
           xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
-            <originDescription altered="true" harvestDate="2020-11-17T09:34:11.128+01:00">
+            <originDescription altered="true" harvestDate="2020-12-16T10:04:03.148Z">
                <baseURL>https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai</baseURL>
-                <identifier>textgrid:q9cv.0</identifier>
-                <datestamp>2012-01-21T13:35:20Z</datestamp>
-                <metadataNamespace>http://schema.datacite.org/oai/oai-1.0/</metadataNamespace>
+                <identifier>textgrid:rn8z.0</identifier>
+                <datestamp>2012-01-29T20:54:12Z</datestamp>
+                <metadataNamespace/>
            </originDescription>
        </provenance>
        <oaf:datainfo>
@ -107,9 +108,10 @@
            <oaf:deletedbyinference>false</oaf:deletedbyinference>
            <oaf:trust>0.9</oaf:trust>
            <oaf:inferenceprovenance/>
-            <oaf:provenanceaction classid="sysimport:crosswalk"
-                                  classname="sysimport:crosswalk"
+            <oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
+                                  classname="sysimport:crosswalk:datasetarchive"
                                  schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
        </oaf:datainfo>
    </about>
 </record>
+
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala
@ -1,15 +1,15 @@
 package eu.dnetlib.dhp.export

+import com.fasterxml.jackson.databind.ObjectMapper
+
 import java.time.LocalDateTime
 import java.time.format.DateTimeFormatter
-
 import eu.dnetlib.dhp.common.PacePerson
 import eu.dnetlib.dhp.schema.action.AtomicAction
 import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty}
 import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
 import eu.dnetlib.dhp.utils.DHPUtils
 import org.apache.commons.lang3.StringUtils
-import org.codehaus.jackson.map.ObjectMapper
 import eu.dnetlib.dhp.schema.scholexplorer.OafUtils._

 import scala.collection.JavaConverters._
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala
@ -1,27 +1,21 @@
 package eu.dnetlib.dhp.`export`

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
-import eu.dnetlib.dhp.schema.oaf.{Instance, Publication, Relation, Dataset => OafDataset}
+import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset}
 import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.{SparkConf, SparkContext}
-import org.codehaus.jackson.map.ObjectMapper
+import org.apache.spark.SparkConf

 import scala.collection.mutable.ArrayBuffer
-import scala.collection.JavaConverters._

 object SparkExportContentForOpenAire {

-
-
-
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(SparkExportContentForOpenAire.getClass.getResourceAsStream("input_export_content_parameters.json")))
@ -178,11 +172,4 @@ object SparkExportContentForOpenAire {
    fRels.union(fpubs).union(fdats).rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingPath/export/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
  }

-
-
-
-
-
-
-
 }
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/DropAndCreateESIndex.java
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/DropAndCreateESIndex.java
@ -10,7 +10,8 @@ import org.apache.http.client.methods.HttpPut;
 import org.apache.http.entity.StringEntity;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
-import org.codehaus.jackson.map.ObjectMapper;
+
+import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;

--- a/Show More
+++ b/Show More