merged from beta

2024-05-02 15:20:23 +02:00 · 2024-05-02 15:20:23 +02:00 · 26bf8e763a
parent a860c57bbc 00ad21d814
commit 26bf8e763a
29 changed files with 258 additions and 2780 deletions
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -66,13 +66,14 @@
 			<groupId>edu.cmu</groupId>
 			<artifactId>secondstring</artifactId>
 		</dependency>
-
 		<dependency>
-			<groupId>eu.dnetlib.dhp</groupId>
-			<artifactId>dhp-pace-core</artifactId>
-			<version>${project.version}</version>
+			<groupId>com.ibm.icu</groupId>
+			<artifactId>icu4j</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-common</artifactId>
 		</dependency>
-
 		<dependency>
 			<groupId>com.github.sisyphsu</groupId>
 			<artifactId>dateparser</artifactId>
@ -119,6 +120,10 @@
 			<groupId>net.sf.saxon</groupId>
 			<artifactId>Saxon-HE</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>jcl-over-slf4j</artifactId>
+		</dependency>
 		<dependency>
 			<groupId>org.apache.cxf</groupId>
 			<artifactId>cxf-rt-transports-http</artifactId>
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@ -135,7 +135,7 @@ public class GroupEntitiesSparkJob {
 					.applyCoarVocabularies(entity, vocs),
 				OAFENTITY_KRYO_ENC)
 			.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, OafEntity, OafEntity>) MergeUtils::mergeGroup, OAFENTITY_KRYO_ENC)
+			.mapGroups((MapGroupsFunction<String, OafEntity, OafEntity>) MergeUtils::mergeById, OAFENTITY_KRYO_ENC)
 			.map(
 				(MapFunction<OafEntity, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
 					t.getClass().getName(), t),
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -30,8 +30,16 @@ import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;

 public class MergeUtils {
+	public static <T extends Oaf> T mergeById(String s, Iterator<T> oafEntityIterator) {
+		return mergeGroup(s, oafEntityIterator, true);
+	}

 	public static <T extends Oaf> T mergeGroup(String s, Iterator<T> oafEntityIterator) {
+		return mergeGroup(s, oafEntityIterator, false);
+	}
+
+	public static <T extends Oaf> T mergeGroup(String s, Iterator<T> oafEntityIterator,
+		boolean checkDelegateAuthority) {
 		TreeSet<T> sortedEntities = new TreeSet<>((o1, o2) -> {
 			int res = 0;

@ -52,18 +60,22 @@ public class MergeUtils {
 			sortedEntities.add(oafEntityIterator.next());
 		}

-		T merged = sortedEntities.descendingIterator().next();
-
 		Iterator<T> it = sortedEntities.descendingIterator();
+		T merged = it.next();
+
 		while (it.hasNext()) {
-			merged = checkedMerge(merged, it.next());
+			merged = checkedMerge(merged, it.next(), checkDelegateAuthority);
 		}

 		return merged;
 	}

-	public static <T extends Oaf> T checkedMerge(final T left, final T right) {
-		return (T) merge(left, right, false);
+	public static <T extends Oaf> T checkedMerge(final T left, final T right, boolean checkDelegateAuthority) {
+		return (T) merge(left, right, checkDelegateAuthority);
+	}
+
+	public static <T extends Result, E extends Result> Result mergeResult(final T left, final E right) {
+		return (Result) merge(left, right, false);
 	}

 	public static Oaf merge(final Oaf left, final Oaf right) {
@ -108,7 +120,7 @@ public class MergeUtils {
 				return mergeSoftware((Software) left, (Software) right);
 			}

-			return mergeResult((Result) left, (Result) right);
+			return mergeResultFields((Result) left, (Result) right);
 		} else if (sameClass(left, right, Datasource.class)) {
 			// TODO
 			final int trust = compareTrust(left, right);
@ -151,9 +163,9 @@ public class MergeUtils {
 		}
 		// TODO: raise trust to have preferred fields from one or the other??
 		if (new ResultTypeComparator().compare(left, right) < 0) {
-			return mergeResult(left, right);
+			return mergeResultFields(left, right);
 		} else {
-			return mergeResult(right, left);
+			return mergeResultFields(right, left);
 		}
 	}

@ -263,6 +275,12 @@ public class MergeUtils {

 	// TODO review
 	private static List<KeyValue> mergeByKey(List<KeyValue> left, List<KeyValue> right, int trust) {
+		if (left == null) {
+			return right;
+		} else if (right == null) {
+			return left;
+		}
+
 		if (trust < 0) {
 			List<KeyValue> s = left;
 			left = right;
@ -368,7 +386,7 @@ public class MergeUtils {
 		return merge;
 	}

-	public static <T extends Result> T mergeResult(T original, T enrich) {
+	private static <T extends Result> T mergeResultFields(T original, T enrich) {
 		final int trust = compareTrust(original, enrich);
 		T merge = mergeOafEntityFields(original, enrich, trust);

@ -694,7 +712,7 @@ public class MergeUtils {

 	private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
 		int trust = compareTrust(original, enrich);
-		final T merge = mergeResult(original, enrich);
+		final T merge = mergeResultFields(original, enrich);

 		merge.setContactperson(unionDistinctLists(merge.getContactperson(), enrich.getContactperson(), trust));
 		merge.setContactgroup(unionDistinctLists(merge.getContactgroup(), enrich.getContactgroup(), trust));
@ -705,7 +723,7 @@ public class MergeUtils {

 	private static <T extends Software> T mergeSoftware(T original, T enrich) {
 		int trust = compareTrust(original, enrich);
-		final T merge = mergeResult(original, enrich);
+		final T merge = mergeResultFields(original, enrich);

 		merge.setDocumentationUrl(unionDistinctLists(merge.getDocumentationUrl(), enrich.getDocumentationUrl(), trust));
 		merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust));
@ -719,7 +737,7 @@ public class MergeUtils {

 	private static <T extends Dataset> T mergeDataset(T original, T enrich) {
 		int trust = compareTrust(original, enrich);
-		T merge = mergeResult(original, enrich);
+		T merge = mergeResultFields(original, enrich);

 		merge.setStoragedate(chooseReference(merge.getStoragedate(), enrich.getStoragedate(), trust));
 		merge.setDevice(chooseReference(merge.getDevice(), enrich.getDevice(), trust));
@ -738,7 +756,7 @@ public class MergeUtils {

 	public static <T extends Publication> T mergePublication(T original, T enrich) {
 		final int trust = compareTrust(original, enrich);
-		T merged = mergeResult(original, enrich);
+		T merged = mergeResultFields(original, enrich);

 		merged.setJournal(chooseReference(merged.getJournal(), enrich.getJournal(), trust));

--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
@ -36,6 +36,15 @@ public class ResultTypeComparator implements Comparator<Result> {
 			return 1;
 		}

+		if (left.getResulttype() == null || left.getResulttype().getClassid() == null) {
+			if (right.getResulttype() == null || right.getResulttype().getClassid() == null) {
+				return 0;
+			}
+			return 1;
+		} else if (right.getResulttype() == null || right.getResulttype().getClassid() == null) {
+			return -1;
+		}
+
 		String lClass = left.getResulttype().getClassid();
 		String rClass = right.getResulttype().getClassid();

--- a/dhp-common/src/main/java/eu/dnetlib/pace/common/PaceCommonUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/pace/common/PaceCommonUtils.java
@ -0,0 +1,100 @@
+
+package eu.dnetlib.pace.common;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
+import com.ibm.icu.text.Transliterator;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.nio.charset.StandardCharsets;
+import java.text.Normalizer;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Set of common functions for the framework
+ *
+ * @author claudio
+ */
+public class PaceCommonUtils {
+
+	// transliterator
+	protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
+	protected static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
+	protected static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
+
+	protected static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
+
+	protected static String fixAliases(final String s) {
+		final StringBuilder sb = new StringBuilder();
+
+		s.chars().forEach(ch -> {
+			final int i = StringUtils.indexOf(aliases_from, ch);
+			sb.append(i >= 0 ? aliases_to.charAt(i) : (char) ch);
+		});
+
+		return sb.toString();
+	}
+
+	protected static String transliterate(final String s) {
+		try {
+			return transliterator.transliterate(s);
+		} catch (Exception e) {
+			return s;
+		}
+	}
+
+	public static String normalize(final String s) {
+		return fixAliases(transliterate(nfd(unicodeNormalization(s))))
+			.toLowerCase()
+			// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
+			// strings
+			.replaceAll("[^ \\w]+", "")
+			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
+			.replaceAll("(\\p{Punct})+", " ")
+			.replaceAll("(\\d)+", " ")
+			.replaceAll("(\\n)+", " ")
+			.trim();
+	}
+
+	public static String nfd(final String s) {
+		return Normalizer.normalize(s, Normalizer.Form.NFD);
+	}
+
+	public static String unicodeNormalization(final String s) {
+
+		Matcher m = hexUnicodePattern.matcher(s);
+		StringBuffer buf = new StringBuffer(s.length());
+		while (m.find()) {
+			String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
+			m.appendReplacement(buf, Matcher.quoteReplacement(ch));
+		}
+		m.appendTail(buf);
+		return buf.toString();
+	}
+
+	public static Set<String> loadFromClasspath(final String classpath) {
+
+		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
+		final Set<String> h = Sets.newHashSet();
+		try {
+			for (final String s : IOUtils
+				.readLines(PaceCommonUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
+				h.add(fixAliases(transliterator.transliterate(s))); // transliteration of the stopwords
+			}
+		} catch (final Throwable e) {
+			return Sets.newHashSet();
+		}
+		return h;
+	}
+
+	protected static Iterable<String> tokens(final String s, final int maxTokens) {
+		return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
+	}
+
+}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
@ -1,21 +1,20 @@

 package eu.dnetlib.pace.model;

-import java.nio.charset.Charset;
-import java.text.Normalizer;
-import java.util.List;
-import java.util.Set;
-
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.hash.Hashing;
-
-import eu.dnetlib.pace.common.AbstractPaceFunctions;
+import eu.dnetlib.pace.common.PaceCommonUtils;
 import eu.dnetlib.pace.util.Capitalise;
 import eu.dnetlib.pace.util.DotAbbreviations;

+import java.nio.charset.Charset;
+import java.text.Normalizer;
+import java.util.List;
+import java.util.Set;
+
 public class Person {

 	private static final String UTF8 = "UTF-8";
@ -86,7 +85,7 @@ public class Person {

 	private List<String> splitTerms(final String s) {
 		if (particles == null) {
-			particles = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
+			particles = PaceCommonUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
 		}

 		final List<String> list = Lists.newArrayList();
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
@ -1,9 +1,8 @@

 package eu.dnetlib.pace.util;

-import org.apache.commons.lang3.text.WordUtils;
-
 import com.google.common.base.Function;
+import org.apache.commons.lang3.text.WordUtils;

 public class Capitalise implements Function<String, String> {

@ -15,4 +14,4 @@ public class Capitalise implements Function<String, String> {
 	public String apply(final String s) {
 		return WordUtils.capitalize(s.toLowerCase(), DELIM);
 	}
-};
+}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DotAbbreviations.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DotAbbreviations.java
@ -8,4 +8,4 @@ public class DotAbbreviations implements Function<String, String> {
 	public String apply(String s) {
 		return s.length() == 1 ? s + "." : s;
 	}
-};
+}
--- a/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/name_particles.txt
+++ b/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/name_particles.txt
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
@ -63,7 +63,7 @@ public class MergeUtilsTest {
 		assertEquals(1, d1.getCollectedfrom().size());
 		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));

-		final Result p1d2 = MergeUtils.checkedMerge(p1, d2);
+		final Result p1d2 = MergeUtils.checkedMerge(p1, d2, true);
 		assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype().getClassid());
 		assertTrue(p1d2 instanceof Publication);
 		assertEquals(p1.getId(), p1d2.getId());
@ -74,7 +74,7 @@ public class MergeUtilsTest {
 		Publication p2 = read("publication_2.json", Publication.class);
 		Dataset d1 = read("dataset_1.json", Dataset.class);

-		final Result p2d1 = MergeUtils.checkedMerge(p2, d1);
+		final Result p2d1 = MergeUtils.checkedMerge(p2, d1, true);
 		assertEquals((ModelConstants.DATASET_RESULTTYPE_CLASSID), p2d1.getResulttype().getClassid());
 		assertTrue(p2d1 instanceof Dataset);
 		assertEquals(d1.getId(), p2d1.getId());
@ -86,7 +86,7 @@ public class MergeUtilsTest {
 		Publication p1 = read("publication_1.json", Publication.class);
 		Publication p2 = read("publication_2.json", Publication.class);

-		Result p1p2 = MergeUtils.checkedMerge(p1, p2);
+		Result p1p2 = MergeUtils.checkedMerge(p1, p2, true);
 		assertTrue(p1p2 instanceof Publication);
 		assertEquals(p1.getId(), p1p2.getId());
 		assertEquals(2, p1p2.getCollectedfrom().size());
--- a/dhp-pace-core/pom.xml
+++ b/dhp-pace-core/pom.xml
@ -49,6 +49,12 @@
 	</build>

 	<dependencies>
+		<dependency>
+			<groupId>eu.dnetlib.dhp</groupId>
+			<artifactId>dhp-common</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+
 		<dependency>
 			<groupId>edu.cmu</groupId>
 			<artifactId>secondstring</artifactId>
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@ -1,32 +1,26 @@

 package eu.dnetlib.pace.common;

+import com.google.common.base.Joiner;
+import com.google.common.collect.Sets;
+import com.ibm.icu.text.Transliterator;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+
 import java.io.IOException;
 import java.io.StringWriter;
 import java.nio.charset.StandardCharsets;
-import java.text.Normalizer;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;

-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Splitter;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Sets;
-import com.ibm.icu.text.Transliterator;
-
-import eu.dnetlib.pace.clustering.NGramUtils;
-
 /**
 * Set of common functions for the framework
 *
 * @author claudio
 */
-public class AbstractPaceFunctions {
+public class AbstractPaceFunctions extends PaceCommonUtils {

 	// city map to be used when translating the city names into codes
 	private static Map<String, String> cityMap = AbstractPaceFunctions
@ -41,9 +35,6 @@ public class AbstractPaceFunctions {
 	protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
 	protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");

-	// transliterator
-	protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
 	// blacklist of ngrams: to avoid generic keys
 	protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");

@ -51,8 +42,6 @@ public class AbstractPaceFunctions {
 	public static final Pattern HTML_REGEX = Pattern.compile("<[^>]*>");

 	private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
-	private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
-	private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";

 	// doi prefix for normalization
 	public static final Pattern DOI_PREFIX = Pattern.compile("(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)");
@ -129,25 +118,6 @@ public class AbstractPaceFunctions {
 		return numberPattern.matcher(strNum).matches();
 	}

-	protected static String fixAliases(final String s) {
-		final StringBuilder sb = new StringBuilder();
-
-		s.chars().forEach(ch -> {
-			final int i = StringUtils.indexOf(aliases_from, ch);
-			sb.append(i >= 0 ? aliases_to.charAt(i) : (char) ch);
-		});
-
-		return sb.toString();
-	}
-
-	protected static String transliterate(final String s) {
-		try {
-			return transliterator.transliterate(s);
-		} catch (Exception e) {
-			return s;
-		}
-	}
-
 	protected static String removeSymbols(final String s) {
 		final StringBuilder sb = new StringBuilder();

@ -162,23 +132,6 @@ public class AbstractPaceFunctions {
 		return s != null;
 	}

-	public static String normalize(final String s) {
-		return fixAliases(transliterate(nfd(unicodeNormalization(s))))
-			.toLowerCase()
-			// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
-			// strings
-			.replaceAll("[^ \\w]+", "")
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
-			.replaceAll("(\\p{Punct})+", " ")
-			.replaceAll("(\\d)+", " ")
-			.replaceAll("(\\n)+", " ")
-			.trim();
-	}
-
-	public static String nfd(final String s) {
-		return Normalizer.normalize(s, Normalizer.Form.NFD);
-	}
-
 	public static String utf8(final String s) {
 		byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
 		return new String(bytes, StandardCharsets.UTF_8);
@ -233,22 +186,6 @@ public class AbstractPaceFunctions {
 		return newset;
 	}

-	public static Set<String> loadFromClasspath(final String classpath) {
-
-		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
-		final Set<String> h = Sets.newHashSet();
-		try {
-			for (final String s : IOUtils
-				.readLines(NGramUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
-				h.add(fixAliases(transliterator.transliterate(s))); // transliteration of the stopwords
-			}
-		} catch (final Throwable e) {
-			return Sets.newHashSet();
-		}
-		return h;
-	}
-
 	public static Map<String, String> loadMapFromClasspath(final String classpath) {

 		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
@ -303,10 +240,6 @@ public class AbstractPaceFunctions {
 		return StringUtils.substring(s, 0, 1).toLowerCase();
 	}

-	protected static Iterable<String> tokens(final String s, final int maxTokens) {
-		return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
-	}
-
 	public static String normalizePid(String pid) {
 		return DOI_PREFIX.matcher(pid.toLowerCase()).replaceAll("");
 	}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java
--- a/dhp-workflows/dhp-dedup-openaire/pom.xml
+++ b/dhp-workflows/dhp-dedup-openaire/pom.xml
@ -38,7 +38,6 @@
                </configuration>
            </plugin>
    	</plugins>
-    
    </build>

    <dependencies>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
@ -189,7 +189,7 @@ public class DedupRecordFactory {
 			entity = swap;
 		}

-		entity = MergeUtils.checkedMerge(entity, duplicate);
+		entity = MergeUtils.checkedMerge(entity, duplicate, false);

 		if (ModelSupport.isSubClass(duplicate, Result.class)) {
 			Result re = (Result) entity;
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
@ -175,6 +175,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
 			}

 			// cap pidType at w3id as from there on they are considered equal
+
 			UserDefinedFunction mapPid = udf(
 				(String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType);

--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java
@ -44,8 +44,10 @@ public class SparkCreateSimRels extends AbstractSparkAction {
 		parser.parseArgument(args);

 		SparkConf conf = new SparkConf();
-		new SparkCreateSimRels(parser, getSparkSession(conf))
-			.run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl")));
+		try (SparkSession session = getSparkSession(conf)) {
+			new SparkCreateSimRels(parser, session)
+				.run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl")));
+		}
 	}

 	@Override
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
@ -102,6 +102,8 @@
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=15000
+                --conf spark.network.timeout=300s
+                --conf spark.shuffle.registration.timeout=50000
            </spark-opts>
            <arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
            <arg>--graphOutputPath</arg><arg>${graphOutputPath}</arg>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml
@ -33,16 +33,14 @@
            <description>max number of elements in a connected component</description>
        </property>
        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
+            <name>sparkResourceOpts</name>
+            <value>--executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4</value>
+            <description>spark resource options</description>
        </property>
        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
+            <name>sparkResourceOptsCreateMergeRel</name>
+            <value>--executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4</value>
+            <description>spark resource options</description>
        </property>
        <property>
            <name>oozieActionShareLibForSpark2</name>
@ -119,9 +117,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -146,9 +142,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkWhitelistSimRels</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -174,9 +168,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCreateMergeRels</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOptsCreateMergeRel}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -203,9 +195,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCreateDedupRecord</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -230,9 +220,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCopyOpenorgsMergeRels</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -257,9 +245,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCreateOrgsDedupRecord</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -283,9 +269,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkUpdateEntity</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -309,9 +293,7 @@
            <class>eu.dnetlib.dhp.oa.dedup.SparkCopyRelationsNoOpenorgs</class>
            <jar>dhp-dedup-openaire-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
+                ${sparkResourceOpts}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java
@ -123,7 +123,7 @@ class EntityMergerTest implements Serializable {
 		assertEquals(dataInfo, pub_merged.getDataInfo());

 		// verify datepicker
-		assertEquals("2018-09-30", pub_merged.getDateofacceptance().getValue());
+		assertEquals("2016-01-01", pub_merged.getDateofacceptance().getValue());

 		// verify authors
 		assertEquals(13, pub_merged.getAuthor().size());
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java
@ -78,7 +78,7 @@ public class IdGeneratorTest {
 		System.out.println("winner 3 = " + id2);

 		assertEquals("50|doi_dedup___::1a77a3bba737f8b669dcf330ad3b37e2", id1);
-		assertEquals("50|dedup_wf_001::0829b5191605bdbea36d6502b8c1ce1g", id2);
+		assertEquals("50|dedup_wf_002::345e5d1b80537b0d0e0a49241ae9e516", id2);
 	}

 	@Test
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java
@ -143,7 +143,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
 			.count();

-		assertEquals(145, orgs_simrel);
+		assertEquals(86, orgs_simrel);
 	}

 	@Test
@ -172,7 +172,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
 			.count();

-		assertEquals(181, orgs_simrel);
+		assertEquals(122, orgs_simrel);
 	}

 	@Test
@ -196,7 +196,9 @@ public class SparkOpenorgsDedupTest implements Serializable {
 					"-la",
 					"lookupurl",
 					"-w",
-					testOutputBasePath
+					testOutputBasePath,
+					"-h",
+					""
 				});

 		new SparkCreateMergeRels(parser, spark).run(isLookUpService);
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java
@ -13,14 +13,16 @@ import java.io.Serializable;
 import java.net.URISyntaxException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.*;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;

 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
@ -129,7 +131,7 @@ public class SparkPublicationRootsTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(workingPath, testActionSetId, "publication"))
 			.count();

-		assertEquals(37, pubs_simrel);
+		assertEquals(9, pubs_simrel);
 	}

 	@Test
@ -142,7 +144,8 @@ public class SparkPublicationRootsTest implements Serializable {
 				"--actionSetId", testActionSetId,
 				"--isLookUpUrl", "lookupurl",
 				"--workingPath", workingPath,
-				"--cutConnectedComponent", "3"
+				"--cutConnectedComponent", "3",
+				"-h", ""
 			}), spark)
 				.run(isLookUpService);

@ -171,7 +174,8 @@ public class SparkPublicationRootsTest implements Serializable {
 				"--graphBasePath", graphInputPath,
 				"--actionSetId", testActionSetId,
 				"--isLookUpUrl", "lookupurl",
-				"--workingPath", workingPath
+				"--workingPath", workingPath,
+				"-h", ""
 			}), spark)
 				.run(isLookUpService);

@ -207,7 +211,7 @@ public class SparkPublicationRootsTest implements Serializable {
 			assertTrue(dups.contains(r.getSource()));
 		});

-		assertEquals(32, merges.count());
+		assertEquals(26, merges.count());
 	}

 	@Test
@ -228,7 +232,7 @@ public class SparkPublicationRootsTest implements Serializable {
 			.textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord")
 			.map(asEntity(Publication.class), Encoders.bean(Publication.class));

-		assertEquals(3, roots.count());
+		assertEquals(4, roots.count());

 		final Dataset<Publication> pubs = spark
 			.read()
@ -369,7 +373,7 @@ public class SparkPublicationRootsTest implements Serializable {
 			.distinct()
 			.count();

-		assertEquals(19, publications); // 16 originals + 3 roots
+		assertEquals(20, publications); // 16 originals + 3 roots

 		long deletedPubs = spark
 			.read()
@ -380,7 +384,7 @@ public class SparkPublicationRootsTest implements Serializable {
 			.distinct()
 			.count();

-		assertEquals(mergedPubs, deletedPubs);
+//		assertEquals(mergedPubs, deletedPubs);
 	}

 	private static String classPathResourceAsString(String path) throws IOException {
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java
@ -169,10 +169,10 @@ public class SparkStatsTest implements Serializable {
 			.count();

 		assertEquals(414, orgs_blocks);
-		assertEquals(187, pubs_blocks);
-		assertEquals(128, sw_blocks);
-		assertEquals(192, ds_blocks);
-		assertEquals(194, orp_blocks);
+		assertEquals(221, pubs_blocks);
+		assertEquals(134, sw_blocks);
+		assertEquals(196, ds_blocks);
+		assertEquals(198, orp_blocks);
 	}

 	@AfterAll
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java
@ -161,7 +161,7 @@ public class SparkResultToCommunityFromProject implements Serializable {
 					}
 				}
 				res.setContext(propagatedContexts);
-				return MergeUtils.checkedMerge(ret, res);
+				return MergeUtils.checkedMerge(ret, res, true);
 			}
 			return ret;
 		};
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -100,16 +100,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.speculation=false
-                --conf spark.hadoop.mapreduce.map.speculative=false
-                --conf spark.hadoop.mapreduce.reduce.speculative=false
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -132,12 +128,11 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -160,12 +155,11 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -188,12 +182,11 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -218,12 +211,11 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
            <arg>--outputPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
@ -247,19 +239,14 @@
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=4
-                --executor-memory=4G
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.executor.memoryOverhead=5G
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-                --conf spark.speculation=false
-                --conf spark.hadoop.mapreduce.map.speculative=false
-                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=15000
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
@ -282,15 +269,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-                --conf spark.speculation=false
-                --conf spark.hadoop.mapreduce.map.speculative=false
-                --conf spark.hadoop.mapreduce.reduce.speculative=false
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
@ -312,15 +296,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-                --conf spark.speculation=false
-                --conf spark.hadoop.mapreduce.map.speculative=false
-                --conf spark.hadoop.mapreduce.reduce.speculative=false
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
@ -342,15 +323,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.dynamicAllocation.enabled=true
-                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-                --conf spark.speculation=false
-                --conf spark.hadoop.mapreduce.map.speculative=false
-                --conf spark.hadoop.mapreduce.reduce.speculative=false
+                --conf spark.sql.shuffle.partitions=4000
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
@ -362,15 +340,6 @@
    </action>
    
    <join name="wait2" to="End"/>
-
-<!--    <action name="reset_workingDir">-->
-<!--        <fs>-->
-<!--            <delete path="${workingDir}"/>-->
-<!--            <mkdir path="${workingDir}"/>-->
-<!--        </fs>-->
-<!--        <ok to="End"/>-->
-<!--        <error to="Kill"/>-->
-<!--    </action>-->
    
    <end name="End"/>
    
--- a/dhp-workflows/dhp-graph-mapper/pom.xml
+++ b/dhp-workflows/dhp-graph-mapper/pom.xml
@ -90,6 +90,12 @@
            <version>${project.version}</version>
        </dependency>

+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-pace-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
        <dependency>
            <groupId>com.jayway.jsonpath</groupId>
            <artifactId>json-path</artifactId>
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java
@ -71,7 +71,7 @@ class GenerateEntitiesApplicationTest {

 	protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
 		String resultType) {
-		final Result merge = MergeUtils.mergeResult(publication, dataset);
+		final Result merge = (Result) MergeUtils.merge(publication, dataset);
 		assertTrue(clazz.isAssignableFrom(merge.getClass()));
 		assertEquals(resultType, merge.getResulttype().getClassid());
 	}
--- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml
@ -71,6 +71,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkHighDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -108,6 +109,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -141,6 +143,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -176,6 +179,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -209,6 +213,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -245,6 +250,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -315,6 +321,7 @@
 				--executor-memory=${sparkNormalExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkNormalExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -361,6 +368,7 @@
 				--executor-memory=${sparkNormalExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkNormalExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -409,6 +417,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkHighDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -444,6 +453,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkHighDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -482,6 +492,7 @@
 				--executor-memory=${sparkHighExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkHighExecutorMemory}
 				--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -533,6 +544,7 @@
 				--executor-memory=${sparkNormalExecutorMemory}
 				--executor-cores=${sparkExecutorCores}
 				--driver-memory=${sparkNormalDriverMemory}
+				--conf spark.executor.memoryOverhead=${sparkNormalExecutorMemory}
 				--conf spark.extraListeners=${spark2ExtraListeners}
 				--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
 				--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}