merged from beta

2024-07-17 12:01:40 +02:00 · 2024-07-17 12:01:40 +02:00 · 06e3985b77
parent c465835061 83327239de
commit 06e3985b77
83 changed files with 2230 additions and 1109 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -328,7 +328,7 @@ public class MergeUtils {
 		final T merged = mergeOafFields(original, enrich, trust);

 		merged.setOriginalId(unionDistinctListOfString(merged.getOriginalId(), enrich.getOriginalId()));
-		merged.setPid(unionDistinctLists(merged.getPid(), enrich.getPid(), trust));
+		merged.setPid(mergeLists(merged.getPid(), enrich.getPid(), trust, MergeUtils::spKeyExtractor, (p1, p2) -> p1));
 		merged.setDateofcollection(LocalDateTime.now().toString());
 		merged
 			.setDateoftransformation(
@ -464,6 +464,10 @@ public class MergeUtils {
 		merge.setIsInDiamondJournal(booleanOR(merge.getIsInDiamondJournal(), enrich.getIsInDiamondJournal()));
 		merge.setPubliclyFunded(booleanOR(merge.getPubliclyFunded(), enrich.getPubliclyFunded()));

+		if (StringUtils.isBlank(merge.getTransformativeAgreement())) {
+			merge.setTransformativeAgreement(enrich.getTransformativeAgreement());
+		}
+
 		return merge;
 	}

@ -655,6 +659,13 @@ public class MergeUtils {
 			return d1;
 		}

+		if (StringUtils.contains(d1.getValue(), "null")) {
+			return d2;
+		}
+		if (StringUtils.contains(d2.getValue(), "null")) {
+			return d1;
+		}
+
 		return Stream
 			.of(d1, d2)
 			.min(
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LegalnameClustering.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LegalnameClustering.java
@ -2,31 +2,41 @@
 package eu.dnetlib.pace.clustering;

 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringUtils;

 import eu.dnetlib.pace.config.Config;

-@ClusteringClass("keywordsclustering")
-public class KeywordsClustering extends AbstractClusteringFunction {
+@ClusteringClass("legalnameclustering")
+public class LegalnameClustering extends AbstractClusteringFunction {

-	public KeywordsClustering(Map<String, Object> params) {
+	private static final Pattern CITY_CODE_PATTERN = Pattern.compile("city::\\d+");
+	private static final Pattern KEYWORD_CODE_PATTERN = Pattern.compile("key::\\d+");
+
+	public LegalnameClustering(Map<String, Object> params) {
 		super(params);
 	}

+	public Set<String> getRegexList(String input, Pattern codeRegex) {
+		Matcher matcher = codeRegex.matcher(input);
+		Set<String> cities = new HashSet<>();
+		while (matcher.find()) {
+			cities.add(matcher.group());
+		}
+		return cities;
+	}
+
 	@Override
 	protected Collection<String> doApply(final Config conf, String s) {

-		// takes city codes and keywords codes without duplicates
-		Set<String> keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4));
-		Set<String> cities = getCities(s, paramOrDefault("windowSize", 4));
-
 		// list of combination to return as result
 		final Collection<String> combinations = new LinkedHashSet<String>();

-		for (String keyword : keywordsToCodes(keywords, conf.translationMap())) {
-			for (String city : citiesToCodes(cities)) {
+		for (String keyword : getRegexList(s, KEYWORD_CODE_PATTERN)) {
+			for (String city : getRegexList(s, CITY_CODE_PATTERN)) {
 				combinations.add(keyword + "-" + city);
 				if (combinations.size() >= paramOrDefault("max", 2)) {
 					return combinations;
@ -42,9 +52,6 @@ public class KeywordsClustering extends AbstractClusteringFunction {
 		return fields
 			.stream()
 			.filter(f -> !f.isEmpty())
-			.map(KeywordsClustering::cleanup)
-			.map(KeywordsClustering::normalize)
-			.map(s -> filterAllStopWords(s))
 			.map(s -> doApply(conf, s))
 			.map(c -> filterBlacklisted(c, ngramBlacklist))
 			.flatMap(c -> c.stream())
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@ -27,6 +27,14 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 	private static Map<String, String> cityMap = AbstractPaceFunctions
 		.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");

+	// keywords map to be used when translating the keyword names into codes
+	private static Map<String, String> keywordMap = AbstractPaceFunctions
+		.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv");
+
+	// country map to be used when inferring the country from the city name
+	private static Map<String, String> countryMap = AbstractPaceFunctions
+		.loadCountryMapFromClasspath("/eu/dnetlib/pace/config/country_map.csv");
+
 	// list of stopwords in different languages
 	protected static Set<String> stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt");
 	protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
@ -74,6 +82,64 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return s12;
 	}

+	public static String countryInference(final String original, String inferFrom) {
+		if (!original.equalsIgnoreCase("unknown"))
+			return original;
+
+		inferFrom = cleanup(inferFrom);
+		inferFrom = normalize(inferFrom);
+		inferFrom = filterAllStopWords(inferFrom);
+		Set<String> cities = getCities(inferFrom, 4);
+		return citiesToCountry(cities).stream().findFirst().orElse("UNKNOWN");
+	}
+
+	public static String cityInference(String original) {
+		original = cleanup(original);
+		original = normalize(original);
+		original = filterAllStopWords(original);
+
+		Set<String> cities = getCities(original, 4);
+
+		for (String city : cities) {
+			original = original.replaceAll(city, cityMap.get(city));
+		}
+
+		return original;
+	}
+
+	public static String keywordInference(String original) {
+		original = cleanup(original);
+		original = normalize(original);
+		original = filterAllStopWords(original);
+
+		Set<String> keywords = getKeywords(original, keywordMap, 4);
+
+		for (String keyword : keywords) {
+			original = original.replaceAll(keyword, keywordMap.get(keyword));
+		}
+
+		return original;
+	}
+
+	public static String cityKeywordInference(String original) {
+		original = cleanup(original);
+		original = normalize(original);
+		original = filterAllStopWords(original);
+
+		Set<String> keywords = getKeywords(original, keywordMap, 4);
+		Set<String> cities = getCities(original, 4);
+
+		for (String keyword : keywords) {
+			original = original.replaceAll(keyword, keywordMap.get(keyword));
+		}
+
+		for (String city : cities) {
+			original = original.replaceAll(city, cityMap.get(city));
+		}
+
+		return original;
+	}
+
 	protected static String fixXML(final String a) {

 		return a
@ -208,6 +274,30 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return m;
 	}

+	public static Map<String, String> loadCountryMapFromClasspath(final String classpath) {
+
+		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
+		final Map<String, String> m = new HashMap<>();
+		try {
+			for (final String s : IOUtils
+				.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
+				// string is like this: country_code;city1;city2;city3
+				String[] line = s.split(";");
+				String value = line[0];
+				for (int i = 1; i < line.length; i++) {
+					String city = fixAliases(transliterator.transliterate(line[i].toLowerCase()));
+					String code = cityMap.get(city);
+					m.put(code, value);
+				}
+			}
+		} catch (final Throwable e) {
+			return new HashMap<>();
+		}
+		return m;
+
+	}
+
 	public static String removeKeywords(String s, Set<String> keywords) {

 		s = " " + s + " ";
@ -237,6 +327,10 @@ public class AbstractPaceFunctions extends PaceCommonUtils {
 		return toCodes(keywords, cityMap);
 	}

+	public static Set<String> citiesToCountry(Set<String> cities) {
+		return toCodes(toCodes(cities, cityMap), countryMap);
+	}
+
 	protected static String firstLC(final String s) {
 		return StringUtils.substring(s, 0, 1).toLowerCase();
 	}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
@ -47,9 +47,21 @@ public class FieldDef implements Serializable {

 	private String clean;

+	private String infer;
+
+	private String inferenceFrom;
+
 	public FieldDef() {
 	}

+	public String getInferenceFrom() {
+		return inferenceFrom;
+	}
+
+	public void setInferenceFrom(final String inferenceFrom) {
+		this.inferenceFrom = inferenceFrom;
+	}
+
 	public String getName() {
 		return name;
 	}
@ -126,6 +138,14 @@ public class FieldDef implements Serializable {
 		this.clean = clean;
 	}

+	public String getInfer() {
+		return infer;
+	}
+
+	public void setInfer(String infer) {
+		this.infer = infer;
+	}
+
 	@Override
 	public String toString() {
 		try {
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala
@ -123,9 +123,19 @@ case class SparkModel(conf: DedupConfig) {
              case _ => res(index)
            }
          }
+
+          if (StringUtils.isNotBlank(fdef.getInfer)) {
+            val inferFrom : String = if (StringUtils.isNotBlank(fdef.getInferenceFrom)) fdef.getInferenceFrom else fdef.getPath
+            res(index) = res(index) match {
+              case x: Seq[String] => x.map(inference(_, MapDocumentUtil.getJPathString(inferFrom, documentContext), fdef.getInfer))
+              case _ => inference(res(index).toString, MapDocumentUtil.getJPathString(inferFrom, documentContext), fdef.getInfer)
+            }
+          }
+
        }

        res
+
    }

    new GenericRowWithSchema(values, schema)
@ -146,5 +156,17 @@ case class SparkModel(conf: DedupConfig) {
    res
  }

+  def inference(value: String, inferfrom: String, infertype: String) : String = {
+    val res = infertype match {
+      case "country" => AbstractPaceFunctions.countryInference(value, inferfrom)
+      case "city" => AbstractPaceFunctions.cityInference(value)
+      case "keyword" => AbstractPaceFunctions.keywordInference(value)
+      case "city_keyword" => AbstractPaceFunctions.cityKeywordInference(value)
+      case _ => value
+    }
+
+    res
+  }
+
 }

--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java
@ -1,48 +0,0 @@
-
-package eu.dnetlib.pace.tree;
-
-import java.util.Map;
-import java.util.Set;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.tree.support.AbstractStringComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-@ComparatorClass("cityMatch")
-public class CityMatch extends AbstractStringComparator {
-
-	private Map<String, String> params;
-
-	public CityMatch(Map<String, String> params) {
-		super(params);
-		this.params = params;
-	}
-
-	@Override
-	public double distance(final String a, final String b, final Config conf) {
-
-		String ca = cleanup(a);
-		String cb = cleanup(b);
-
-		ca = normalize(ca);
-		cb = normalize(cb);
-
-		ca = filterAllStopWords(ca);
-		cb = filterAllStopWords(cb);
-
-		Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
-		Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
-
-		Set<String> codes1 = citiesToCodes(cities1);
-		Set<String> codes2 = citiesToCodes(cities2);
-
-		// if no cities are detected, the comparator gives 1.0
-		if (codes1.isEmpty() && codes2.isEmpty())
-			return 1.0;
-		else {
-			if (codes1.isEmpty() ^ codes2.isEmpty())
-				return -1; // undefined if one of the two has no cities
-			return commonElementsPercentage(codes1, codes2);
-		}
-	}
-}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java
@ -0,0 +1,51 @@
+
+package eu.dnetlib.pace.tree;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.tree.support.AbstractStringComparator;
+import eu.dnetlib.pace.tree.support.ComparatorClass;
+
+@ComparatorClass("codeMatch")
+public class CodeMatch extends AbstractStringComparator {
+
+	private Map<String, String> params;
+
+	private Pattern CODE_REGEX;
+
+	public CodeMatch(Map<String, String> params) {
+		super(params);
+		this.params = params;
+		this.CODE_REGEX = Pattern.compile(params.getOrDefault("codeRegex", "[a-zA-Z]::\\d+"));
+	}
+
+	public Set<String> getRegexList(String input) {
+		Matcher matcher = this.CODE_REGEX.matcher(input);
+		Set<String> cities = new HashSet<>();
+		while (matcher.find()) {
+			cities.add(matcher.group());
+		}
+		return cities;
+	}
+
+	@Override
+	public double distance(final String a, final String b, final Config conf) {
+
+		Set<String> codes1 = getRegexList(a);
+		Set<String> codes2 = getRegexList(b);
+
+		// if no codes are detected, the comparator gives 1.0
+		if (codes1.isEmpty() && codes2.isEmpty())
+			return 1.0;
+		else {
+			if (codes1.isEmpty() ^ codes2.isEmpty())
+				return -1; // undefined if one of the two has no codes
+			return commonElementsPercentage(codes1, codes2);
+		}
+	}
+}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.pace.tree;
+
+import java.util.Map;
+import java.util.Set;
+
+import com.wcohen.ss.AbstractStringDistance;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.tree.support.AbstractStringComparator;
+import eu.dnetlib.pace.tree.support.ComparatorClass;
+
+@ComparatorClass("countryMatch")
+public class CountryMatch extends AbstractStringComparator {
+
+	private Map<String, String> params;
+
+	public CountryMatch(Map<String, String> params) {
+		super(params, new com.wcohen.ss.JaroWinkler());
+		this.params = params;
+	}
+
+	public CountryMatch(final double weight) {
+		super(weight, new com.wcohen.ss.JaroWinkler());
+	}
+
+	protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
+		super(weight, ssalgo);
+	}
+
+	@Override
+	public double distance(final String a, final String b, final Config conf) {
+
+		if (a.isEmpty() || b.isEmpty()) {
+			return -1.0; // return -1 if a field is missing
+		}
+		if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
+			return -1.0; // return -1 if a country is UNKNOWN
+		}
+
+		return a.equals(b) ? 1.0 : 0;
+	}
+
+	@Override
+	public double getWeight() {
+		return super.weight;
+	}
+
+	@Override
+	protected double normalize(final double d) {
+		return d;
+	}
+
+}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java
@ -0,0 +1,59 @@
+
+package eu.dnetlib.pace.tree;
+
+import java.util.Map;
+import java.util.Set;
+
+import com.wcohen.ss.AbstractStringDistance;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.tree.support.AbstractStringComparator;
+import eu.dnetlib.pace.tree.support.ComparatorClass;
+
+@ComparatorClass("jaroWinklerLegalname")
+public class JaroWinklerLegalname extends AbstractStringComparator {
+
+	private Map<String, String> params;
+
+	private final String CITY_CODE_REGEX = "city::\\d+";
+	private final String KEYWORD_CODE_REGEX = "key::\\d+";
+
+	public JaroWinklerLegalname(Map<String, String> params) {
+		super(params, new com.wcohen.ss.JaroWinkler());
+		this.params = params;
+	}
+
+	public JaroWinklerLegalname(double weight) {
+		super(weight, new com.wcohen.ss.JaroWinkler());
+	}
+
+	protected JaroWinklerLegalname(double weight, AbstractStringDistance ssalgo) {
+		super(weight, ssalgo);
+	}
+
+	@Override
+	public double distance(String a, String b, final Config conf) {
+
+		String ca = a.replaceAll(CITY_CODE_REGEX, "").replaceAll(KEYWORD_CODE_REGEX, " ");
+		String cb = b.replaceAll(CITY_CODE_REGEX, "").replaceAll(KEYWORD_CODE_REGEX, " ");
+
+		ca = ca.replaceAll("[ ]{2,}", " ");
+		cb = cb.replaceAll("[ ]{2,}", " ");
+
+		if (ca.isEmpty() && cb.isEmpty())
+			return 1.0;
+		else
+			return normalize(ssalgo.score(ca, cb));
+	}
+
+	@Override
+	public double getWeight() {
+		return super.weight;
+	}
+
+	@Override
+	protected double normalize(double d) {
+		return d;
+	}
+
+}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
@ -1,74 +0,0 @@
-
-package eu.dnetlib.pace.tree;
-
-import java.util.Map;
-import java.util.Set;
-
-import com.wcohen.ss.AbstractStringDistance;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.tree.support.AbstractStringComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-@ComparatorClass("jaroWinklerNormalizedName")
-public class JaroWinklerNormalizedName extends AbstractStringComparator {
-
-	private Map<String, String> params;
-
-	public JaroWinklerNormalizedName(Map<String, String> params) {
-		super(params, new com.wcohen.ss.JaroWinkler());
-		this.params = params;
-	}
-
-	public JaroWinklerNormalizedName(double weight) {
-		super(weight, new com.wcohen.ss.JaroWinkler());
-	}
-
-	protected JaroWinklerNormalizedName(double weight, AbstractStringDistance ssalgo) {
-		super(weight, ssalgo);
-	}
-
-	@Override
-	public double distance(String a, String b, final Config conf) {
-		String ca = cleanup(a);
-		String cb = cleanup(b);
-
-		ca = normalize(ca);
-		cb = normalize(cb);
-
-		ca = filterAllStopWords(ca);
-		cb = filterAllStopWords(cb);
-
-		Set<String> keywords1 = getKeywords(
-			ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
-		Set<String> keywords2 = getKeywords(
-			cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
-
-		Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
-		Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
-
-		ca = removeKeywords(ca, keywords1);
-		ca = removeKeywords(ca, cities1);
-		cb = removeKeywords(cb, keywords2);
-		cb = removeKeywords(cb, cities2);
-
-		ca = ca.replaceAll("[ ]{2,}", " ");
-		cb = cb.replaceAll("[ ]{2,}", " ");
-
-		if (ca.isEmpty() && cb.isEmpty())
-			return 1.0;
-		else
-			return normalize(ssalgo.score(ca, cb));
-	}
-
-	@Override
-	public double getWeight() {
-		return super.weight;
-	}
-
-	@Override
-	protected double normalize(double d) {
-		return d;
-	}
-
-}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java
@ -1,50 +0,0 @@
-
-package eu.dnetlib.pace.tree;
-
-import java.util.Map;
-import java.util.Set;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.tree.support.AbstractStringComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-@ComparatorClass("keywordMatch")
-public class KeywordMatch extends AbstractStringComparator {
-
-	Map<String, String> params;
-
-	public KeywordMatch(Map<String, String> params) {
-		super(params);
-		this.params = params;
-	}
-
-	@Override
-	public double distance(final String a, final String b, final Config conf) {
-
-		String ca = cleanup(a);
-		String cb = cleanup(b);
-
-		ca = normalize(ca);
-		cb = normalize(cb);
-
-		ca = filterAllStopWords(ca);
-		cb = filterAllStopWords(cb);
-
-		Set<String> keywords1 = getKeywords(
-			ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
-		Set<String> keywords2 = getKeywords(
-			cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
-
-		Set<String> codes1 = toCodes(keywords1, conf.translationMap());
-		Set<String> codes2 = toCodes(keywords2, conf.translationMap());
-
-		// if no cities are detected, the comparator gives 1.0
-		if (codes1.isEmpty() && codes2.isEmpty())
-			return 1.0;
-		else {
-			if (codes1.isEmpty() ^ codes2.isEmpty())
-				return -1.0; // undefined if one of the two has no keywords
-			return commonElementsPercentage(codes1, codes2);
-		}
-	}
-}
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java
@ -48,7 +48,7 @@ public class TreeNodeDef implements Serializable {
 	// function for the evaluation of the node
 	public TreeNodeStats evaluate(Row doc1, Row doc2, Config conf) {

-		TreeNodeStats stats = new TreeNodeStats();
+		TreeNodeStats stats = new TreeNodeStats(ignoreUndefined);

 		// for each field in the node, it computes the
 		for (FieldConf fieldConf : fields) {
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java
@ -9,8 +9,11 @@ public class TreeNodeStats implements Serializable {

 	private Map<String, FieldStats> results; // this is an accumulator for the results of the node

-	public TreeNodeStats() {
+	private final boolean ignoreUndefined;
+
+	public TreeNodeStats(boolean ignoreUndefined) {
 		this.results = new HashMap<>();
+		this.ignoreUndefined = ignoreUndefined;
 	}

 	public Map<String, FieldStats> getResults() {
@ -22,7 +25,10 @@ public class TreeNodeStats implements Serializable {
 	}

 	public int fieldsCount() {
+		if (ignoreUndefined)
 			return this.results.size();
+		else
+			return this.results.size() - undefinedCount(); // do not count undefined
 	}

 	public int undefinedCount() {
@ -78,12 +84,23 @@ public class TreeNodeStats implements Serializable {
 		double min = 100.0; // random high value
 		for (FieldStats fs : this.results.values()) {
 			if (fs.getResult() < min) {
-				if (fs.getResult() >= 0.0 || (fs.getResult() == -1 && fs.isCountIfUndefined()))
+				if (fs.getResult() == -1) {
+					if (fs.isCountIfUndefined()) {
+						min = 0.0;
+					} else {
+						min = -1;
+					}
+				} else {
 					min = fs.getResult();
 				}
 			}
+		}
+		if (ignoreUndefined) {
+			return min == -1.0 ? 0.0 : min;
+		} else {
 			return min;
 		}
+	}

 	// if at least one is true, return 1.0
 	public double or() {
@ -91,8 +108,12 @@ public class TreeNodeStats implements Serializable {
 			if (fieldStats.getResult() >= fieldStats.getThreshold())
 				return 1.0;
 		}
+		if (!ignoreUndefined && undefinedCount() > 0) {
+			return -1.0;
+		} else {
 			return 0.0;
 		}
+	}

 	// if at least one is false, return 0.0
 	public double and() {
@ -100,7 +121,7 @@ public class TreeNodeStats implements Serializable {

 			if (fieldStats.getResult() == -1) {
 				if (fieldStats.isCountIfUndefined())
-					return 0.0;
+					return ignoreUndefined ? 0.0 : -1.0;
 			} else {
 				if (fieldStats.getResult() < fieldStats.getThreshold())
 					return 0.0;
--- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java
@ -44,12 +44,10 @@ public class TreeProcessor {
 			TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
 			treeStats.addNodeStats(nextNodeName, stats);

-			// if ignoreUndefined=false the miss is considered as undefined
-			if (!currentNode.isIgnoreUndefined() && stats.undefinedCount() > 0) {
+			double finalScore = stats.getFinalScore(currentNode.getAggregation());
+			if (finalScore == -1.0)
 				nextNodeName = currentNode.getUndefined();
-			}
-			// if ignoreUndefined=true the miss is ignored and the score computed anyway
-			else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
+			else if (finalScore >= currentNode.getThreshold()) {
 				nextNodeName = currentNode.getPositive();
 			} else {
 				nextNodeName = currentNode.getNegative();
--- a/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv
+++ b/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java
@ -8,6 +8,7 @@ import org.junit.jupiter.api.Test;

 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.mongodb.connection.Cluster;

 import eu.dnetlib.pace.AbstractPaceTest;
 import eu.dnetlib.pace.common.AbstractPaceFunctions;
@ -177,41 +178,16 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 	}

 	@Test
-	public void testKeywordsClustering() {
+	public void legalnameClustering() {

-		final ClusteringFunction cf = new KeywordsClustering(params);
-		final String s = "Polytechnic University of Turin";
+		final ClusteringFunction cf = new LegalnameClustering(params);
+		String s = "key::1 key::2 city::1";
 		System.out.println(s);
 		System.out.println(cf.apply(conf, Lists.newArrayList(s)));

-		final String s1 = "POLITECNICO DI TORINO";
-		System.out.println(s1);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s1)));
-
-		final String s2 = "Universita farmaceutica culturale di milano bergamo";
-		System.out.println("s2 = " + s2);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s2)));
-
-		final String s3 = "universita universita milano milano";
-		System.out.println("s3 = " + s3);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s3)));
-
-		final String s4 = "Politechniki Warszawskiej (Warsaw University of Technology)";
-		System.out.println("s4 = " + s4);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s4)));
-
-		final String s5 = "İstanbul Ticarət Universiteti";
-		System.out.println("s5 = " + s5);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s5)));
-
-		final String s6 = "National and Kapodistrian University of Athens";
-		System.out.println("s6 = " + s6);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s6)));
-
-		final String s7 = "Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών";
-		System.out.println("s7 = " + s7);
-		System.out.println(cf.apply(conf, Lists.newArrayList(s7)));
-
+		s = "key::1 key::2 city::1 city::2";
+		System.out.println(s);
+		System.out.println(cf.apply(conf, Lists.newArrayList(s)));
 	}

 	@Test
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java
@ -54,4 +54,47 @@ public class PaceFunctionTest extends AbstractPaceFunctions {
 		System.out.println("Fixed aliases  : " + fixAliases(TEST_STRING));
 	}

+	@Test
+	public void countryInferenceTest() {
+		assertEquals("IT", countryInference("UNKNOWN", "Università di Bologna"));
+		assertEquals("UK", countryInference("UK", "Università di Bologna"));
+		assertEquals("IT", countryInference("UNKNOWN", "Universiteé de Naples"));
+		assertEquals("UNKNOWN", countryInference("UNKNOWN", "Università del Lavoro"));
+	}
+
+	@Test
+	public void cityInferenceTest() {
+		assertEquals("universita city::3181928", cityInference("Università di Bologna"));
+		assertEquals("university city::3170647", cityInference("University of Pisa"));
+		assertEquals("universita", cityInference("Università del lavoro"));
+		assertEquals("universita city::3173331 city::3169522", cityInference("Università di Modena e Reggio Emilia"));
+	}
+
+	@Test
+	public void keywordInferenceTest() {
+		assertEquals("key::41 turin", keywordInference("Polytechnic University of Turin"));
+		assertEquals("key::41 torino", keywordInference("POLITECNICO DI TORINO"));
+		assertEquals(
+			"key::1 key::60 key::81 milano bergamo",
+			keywordInference("Universita farmaceutica culturale di milano bergamo"));
+		assertEquals("key::1 key::1 milano milano", keywordInference("universita universita milano milano"));
+		assertEquals(
+			"key::10 kapodistriako panepistemio athenon",
+			keywordInference("Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών"));
+	}
+
+	@Test
+	public void cityKeywordInferenceTest() {
+		assertEquals("key::41 city::3165524", cityKeywordInference("Polytechnic University of Turin"));
+		assertEquals("key::41 city::3165524", cityKeywordInference("POLITECNICO DI TORINO"));
+		assertEquals(
+			"key::1 key::60 key::81 city::3173435 city::3182164",
+			cityKeywordInference("Universita farmaceutica culturale di milano bergamo"));
+		assertEquals(
+			"key::1 key::1 city::3173435 city::3173435", cityKeywordInference("universita universita milano milano"));
+		assertEquals(
+			"key::10 kapodistriako panepistemio city::264371",
+			cityKeywordInference("Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών"));
+	}
+
 }
--- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java
+++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java
@ -35,6 +35,7 @@ public class ComparatorTest extends AbstractPaceTest {
 		params.put("name_th", "0.95");
 		params.put("jpath_value", "$.value");
 		params.put("jpath_classid", "$.qualifier.classid");
+		params.put("codeRegex", "key::\\d+");
 	}

 	@Test
@ -44,52 +45,23 @@ public class ComparatorTest extends AbstractPaceTest {
 	}

 	@Test
-	public void cityMatchTest() {
-		final CityMatch cityMatch = new CityMatch(params);
+	public void codeMatchTest() {
+		CodeMatch codeMatch = new CodeMatch(params);

-		// both names with no cities
-		assertEquals(1.0, cityMatch.distance("Università", "Centro di ricerca", conf));
+		// both names with no codes
+		assertEquals(1.0, codeMatch.distance("testing1", "testing2", conf));

-		// one of the two names with no cities
-		assertEquals(-1.0, cityMatch.distance("Università di Bologna", "Centro di ricerca", conf));
+		// one of the two names with no codes
+		assertEquals(-1.0, codeMatch.distance("testing1 key::1", "testing", conf));

-		// both names with cities (same)
-		assertEquals(1.0, cityMatch.distance("Universita di Bologna", "Biblioteca di Bologna", conf));
+		// both names with codes (same)
+		assertEquals(1.0, codeMatch.distance("testing1 key::1", "testing2 key::1", conf));

-		// both names with cities (different)
-		assertEquals(0.0, cityMatch.distance("Universita di Bologna", "Universita di Torino", conf));
-		assertEquals(0.0, cityMatch.distance("Franklin College", "Concordia College", conf));
+		// both names with codes (different)
+		assertEquals(0.0, codeMatch.distance("testing1 key::1", "testing2 key::2", conf));

-		// particular cases
-		assertEquals(1.0, cityMatch.distance("Free University of Bozen-Bolzano", "Università di Bolzano", conf));
-		assertEquals(
-			1.0,
-			cityMatch
-				.distance(
-					"Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology",
-					conf));
-
-		// failing becasuse 'Allen' is a transliterrated greek stopword
-		// assertEquals(-1.0, cityMatch.distance("Allen (United States)", "United States Military Academy", conf));
-		assertEquals(-1.0, cityMatch.distance("Washington (United States)", "United States Military Academy", conf));
-	}
-
-	@Test
-	public void keywordMatchTest() {
-		params.put("threshold", "0.5");
-
-		final KeywordMatch keywordMatch = new KeywordMatch(params);
-
-		assertEquals(
-			0.5, keywordMatch.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf));
-		assertEquals(1.0, keywordMatch.distance("Universita degli studi di Pisa", "Universita di Pisa", conf));
-		assertEquals(1.0, keywordMatch.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf));
-		assertEquals(1.0, keywordMatch.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf));
-		assertEquals(1.0, keywordMatch.distance("Franklin College", "Concordia College", conf));
-		assertEquals(2.0 / 3.0, keywordMatch.distance("University of Georgia", "Georgia State University", conf));
-		assertEquals(0.5, keywordMatch.distance("University College London", "University of London", conf));
-		assertEquals(0.5, keywordMatch.distance("Washington State University", "University of Washington", conf));
-		assertEquals(-1.0, keywordMatch.distance("Allen (United States)", "United States Military Academy", conf));
+		// both names with codes (1 same, 1 different)
+		assertEquals(0.5, codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf));

 	}

@ -155,15 +127,15 @@ public class ComparatorTest extends AbstractPaceTest {
 	}

 	@Test
-	public void jaroWinklerNormalizedNameTest() {
+	public void jaroWinklerLegalnameTest() {

-		final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
+		final JaroWinklerLegalname jaroWinklerLegalname = new JaroWinklerLegalname(params);

-		double result = jaroWinklerNormalizedName
-			.distance("AT&T (United States)", "United States Military Academy", conf);
+		double result = jaroWinklerLegalname
+			.distance("AT&T (United States)", "United States key::2 key::1", conf);
 		System.out.println("result = " + result);

-		result = jaroWinklerNormalizedName.distance("NOAA - Servicio Meteorol\\u00f3gico Nacional", "NOAA - NWS", conf);
+		result = jaroWinklerLegalname.distance("NOAA - Servicio Meteorol\\u00f3gico Nacional", "NOAA - NWS", conf);
 		System.out.println("result = " + result);

 	}
@ -336,4 +308,23 @@ public class ComparatorTest extends AbstractPaceTest {
 		System.out.println("compare = " + compare);
 	}

+	@Test
+	public void countryMatch() {
+
+		CountryMatch countryMatch = new CountryMatch(params);
+
+		double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
+		assertEquals(-1.0, result);
+
+		result = countryMatch.distance("CL", "UNKNOWN", conf);
+		assertEquals(-1.0, result);
+
+		result = countryMatch.distance("CL", "IT", conf);
+		assertEquals(0.0, result);
+
+		result = countryMatch.distance("CL", "CL", conf);
+		assertEquals(1.0, result);
+
+	}
+
 }
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@ -51,48 +51,5 @@
            <artifactId>hadoop-distcp</artifactId>
        </dependency>

-        <dependency>
-            <groupId>eu.dnetlib</groupId>
-            <artifactId>dnet-actionmanager-api</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib</groupId>
-            <artifactId>dnet-actionmanager-common</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>eu.dnetlib</groupId>
-                    <artifactId>dnet-openaireplus-mapping-utils</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>saxonica</groupId>
-                    <artifactId>saxon</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>saxonica</groupId>
-                    <artifactId>saxon-dom</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>jgrapht</groupId>
-                    <artifactId>jgrapht</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>net.sf.ehcache</groupId>
-                    <artifactId>ehcache</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.springframework</groupId>
-                    <artifactId>spring-test</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.*</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>apache</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
    </dependencies>
 </project>
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager;
 import java.io.Serializable;
 import java.io.StringReader;
 import java.util.List;
-import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
@ -22,7 +21,6 @@ import com.google.common.base.Splitter;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Sets;

-import eu.dnetlib.actionmanager.rmi.ActionManagerException;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -65,7 +63,7 @@ public class ISClient implements Serializable {
 						.map(t -> buildDirectory(basePath, t))
 						.collect(Collectors.toList()))
 				.orElseThrow(() -> new IllegalStateException("empty set list"));
-		} catch (ActionManagerException | ISLookUpException e) {
+		} catch (ISLookUpException e) {
 			throw new IllegalStateException("unable to query ActionSets info from the IS");
 		}
 	}
@ -89,31 +87,18 @@ public class ISClient implements Serializable {
 		return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
 	}

-	private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
+	private String getBasePathHDFS(ISLookUpService isLookup) throws ISLookUpException {
 		return queryServiceProperty(isLookup, "basePath");
 	}

 	private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
-		throws ActionManagerException {
+		throws ISLookUpException {
 		final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
 			+ propertyName
 			+ "']/@value/string()";
 		log.debug("quering for service property: {}", q);
-		try {
+
 		final List<String> value = isLookup.quickSearchProfile(q);
 		return Iterables.getOnlyElement(value);
-		} catch (ISLookUpException e) {
-			String msg = "Error accessing service profile, using query: " + q;
-			log.error(msg, e);
-			throw new ActionManagerException(msg, e);
-		} catch (NoSuchElementException e) {
-			String msg = "missing service property: " + propertyName;
-			log.error(msg, e);
-			throw new ActionManagerException(msg, e);
-		} catch (IllegalArgumentException e) {
-			String msg = "found more than one service property: " + propertyName;
-			log.error(msg, e);
-			throw new ActionManagerException(msg, e);
-		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java
@ -42,6 +42,9 @@ public class Constants {
 	public static final String NULL = "NULL";
 	public static final String NA = "N/A";

+	public static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
+	public static final String WEB_CRAWL_NAME = "Web Crawl";
+
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

 	private Constants() {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -41,9 +41,9 @@ public class PrepareAffiliationRelations implements Serializable {
 	private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelations.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static final String ID_PREFIX = "50|doi_________::";
-	public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
-	public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
-	public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
+	public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:openaireinference";
+	public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by OpenAIRE";
+	public static final String BIP_INFERENCE_PROVENANCE = "openaire:affiliation";

 	public static <I extends Result> void main(String[] args) throws Exception {

@ -71,6 +71,9 @@ public class PrepareAffiliationRelations implements Serializable {
 		final String dataciteInputPath = parser.get("dataciteInputPath");
 		log.info("dataciteInputPath: {}", dataciteInputPath);

+		final String webcrawlInputPath = parser.get("webCrawlInputPath");
+		log.info("webcrawlInputPath: {}", webcrawlInputPath);
+
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

@ -102,10 +105,16 @@ public class PrepareAffiliationRelations implements Serializable {
 				JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
 					spark, dataciteInputPath, collectedFromDatacite);

+				List<KeyValue> collectedFromWebCrawl = OafMapperUtils
+					.listKeyValues(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME);
+				JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
+					spark, webcrawlInputPath, collectedFromWebCrawl);
+
 				crossrefRelations
 					.union(pubmedRelations)
 					.union(openAPCRelations)
 					.union(dataciteRelations)
+					.union(webCrawlRelations)
 					.saveAsHadoopFile(
 						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.Serializable;
 import java.util.*;
-import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

+import eu.dnetlib.dhp.actionmanager.Constants;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -44,8 +44,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 	private static final String PMID_PREFIX = "50|pmid________::";

 	private static final String PMCID_PREFIX = "50|pmc_________::";
-	private static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
-	private static final String WEB_CRAWL_NAME = "Web Crawl";
+
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

 	public static void main(String[] args) throws Exception {
@ -104,8 +103,6 @@ public class CreateActionSetFromWebEntries implements Serializable {
 				final String ror = ROR_PREFIX
 					+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
 				ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
-//				ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
-//				ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));

 				return ret
 					.iterator();
@ -145,11 +142,6 @@ public class CreateActionSetFromWebEntries implements Serializable {
 				"institution.country_code as country_code", "publication_year")
 			.distinct();

-//			.selectExpr(
-//				"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
-//				"institution.country_code as country_code", "publication_year")
-//			.distinct();
-
 	}

 	private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
@ -220,7 +212,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 						ModelConstants.IS_AUTHOR_INSTITUTION_OF,
 						Arrays
 							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
+								OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
 						OafMapperUtils
 							.dataInfo(
 								false, null, false, false,
@ -239,7 +231,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
 						ModelConstants.HAS_AUTHOR_INSTITUTION,
 						Arrays
 							.asList(
-								OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)),
+								OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
 						OafMapperUtils
 							.dataInfo(
 								false, null, false, false,
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java
@ -0,0 +1,76 @@
+
+package eu.dnetlib.dhp.collection.plugin.researchfi;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.NameValuePair;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.message.BasicNameValuePair;
+import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class ResearchFiCollectorPlugin implements CollectorPlugin {
+
+	private static final Logger log = LoggerFactory.getLogger(ResearchFiCollectorPlugin.class);
+
+	@Override
+	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report)
+		throws CollectorException {
+
+		final String authUrl = api.getParams().get("auth_url");
+		final String clientId = api.getParams().get("auth_client_id");
+		final String clientSecret = api.getParams().get("auth_client_secret");
+
+		final String authToken = authenticate(authUrl, clientId, clientSecret);
+
+		final Iterator<String> iter = new ResearchFiIterator(api.getBaseUrl(), authToken);
+
+		return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), false);
+	}
+
+	private String authenticate(final String authUrl, final String clientId, final String clientSecret)
+		throws CollectorException {
+		try (final CloseableHttpClient client = HttpClients.createDefault()) {
+			final HttpPost req = new HttpPost(authUrl);
+			final List<NameValuePair> params = new ArrayList<>();
+			params.add(new BasicNameValuePair("grant_type", "client_credentials"));
+			params.add(new BasicNameValuePair("client_id", clientId));
+			params.add(new BasicNameValuePair("client_secret", clientSecret));
+
+			req.setEntity(new UrlEncodedFormEntity(params, "UTF-8"));
+
+			try (final CloseableHttpResponse response = client.execute(req)) {
+				final String content = IOUtils.toString(response.getEntity().getContent());
+				final JSONObject obj = new JSONObject(content);
+				final String token = obj.getString("access_token");
+				if (StringUtils.isNotBlank(token)) {
+					return token;
+				}
+			}
+		} catch (final Throwable e) {
+			log.warn("Error obtaining access token", e);
+			throw new CollectorException("Error obtaining access token", e);
+		}
+		throw new CollectorException("Access token is missing");
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java
@ -0,0 +1,117 @@
+
+package eu.dnetlib.dhp.collection.plugin.researchfi;
+
+import java.util.Iterator;
+import java.util.Queue;
+import java.util.concurrent.PriorityBlockingQueue;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.Header;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.json.JSONArray;
+
+import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class ResearchFiIterator implements Iterator<String> {
+
+	private static final Log log = LogFactory.getLog(ResearchFiIterator.class);
+
+	private static final int PAGE_SIZE = 100;
+
+	private final String baseUrl;
+	private final String authToken;
+	private int currPage;
+	private int nPages;
+
+	private final Queue<String> queue = new PriorityBlockingQueue<>();
+
+	public ResearchFiIterator(final String baseUrl, final String authToken) {
+		this.baseUrl = baseUrl;
+		this.authToken = authToken;
+		this.currPage = 0;
+		this.nPages = 0;
+	}
+
+	private void verifyStarted() {
+		if (this.currPage == 0) {
+			try {
+				nextCall();
+			} catch (final CollectorException e) {
+				throw new IllegalStateException(e);
+			}
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		synchronized (this.queue) {
+			verifyStarted();
+			return !this.queue.isEmpty();
+		}
+	}
+
+	@Override
+	public String next() {
+		synchronized (this.queue) {
+			verifyStarted();
+			final String res = this.queue.poll();
+			while (this.queue.isEmpty() && (this.currPage < this.nPages)) {
+				try {
+					nextCall();
+				} catch (final CollectorException e) {
+					throw new IllegalStateException(e);
+				}
+			}
+			return res;
+		}
+	}
+
+	private void nextCall() throws CollectorException {
+
+		this.currPage += 1;
+
+		final String url;
+		if (!this.baseUrl.contains("?")) {
+			url = String.format("%s?PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE);
+		} else if (!this.baseUrl.contains("PageSize=")) {
+			url = String.format("%s&PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE);
+		} else {
+			url = String.format("%s&PageNumber=%d", this.baseUrl, this.currPage);
+		}
+		log.info("Calling url: " + url);
+
+		try (final CloseableHttpClient client = HttpClients.createDefault()) {
+
+			final HttpGet req = new HttpGet(url);
+			req.addHeader("Authorization", "Bearer " + this.authToken);
+			try (final CloseableHttpResponse response = client.execute(req)) {
+				for (final Header header : response.getAllHeaders()) {
+					log.debug("HEADER: " + header.getName() + " = " + header.getValue());
+					if ("x-page-count".equals(header.getName())) {
+						final int totalPages = NumberUtils.toInt(header.getValue());
+						if (this.nPages != totalPages) {
+							this.nPages = NumberUtils.toInt(header.getValue());
+							log.info("Total pages: " + totalPages);
+						}
+					}
+				}
+
+				final String content = IOUtils.toString(response.getEntity().getContent());
+				final JSONArray jsonArray = new JSONArray(content);
+
+				jsonArray.forEach(obj -> this.queue.add(JsonUtils.convertToXML(obj.toString())));
+			}
+		} catch (final Throwable e) {
+			log.warn("Error calling url: " + url, e);
+			throw new CollectorException("Error calling url: " + url, e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
@ -28,7 +28,13 @@
    "paramLongName": "dataciteInputPath",
    "paramDescription": "the path to get the input data from Datacite",
    "paramRequired": true
-  },
+  },{
+  "paramName": "wip",
+  "paramLongName": "webCrawlInputPath",
+  "paramDescription": "the path to get the input data from Web Crawl",
+  "paramRequired": true
+}
+,
  {
    "paramName": "o",
    "paramLongName": "outputPath",
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -17,6 +17,10 @@
            <name>dataciteInputPath</name>
            <description>the path where to find the inferred affiliation relations from Datacite</description>
        </property>
+        <property>
+            <name>webCrawlInputPath</name>
+            <description>the path where to find the inferred affiliation relations from webCrawl</description>
+        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -112,7 +116,7 @@
            <arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
            <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
            <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
-
+            <arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/crossref/irish_funder.json
@ -1,10 +1,5 @@
 [
-  {
-    "id": "100007630",
-    "uri": "http://dx.doi.org/10.13039/100007630",
-    "name": "College of Engineering and Informatics, National University of Ireland, Galway",
-    "synonym": []
-  },
+
  {
    "id": "100007731",
    "uri": "http://dx.doi.org/10.13039/100007731",
@ -432,13 +427,13 @@
    "id": "501100001634",
    "uri": "http://dx.doi.org/10.13039/501100001634",
    "name": "University of Galway",
-    "synonym": []
+    "synonym": ["501100019905", "100007630", "501100020570", "501100023852"]
  },
  {
    "id": "501100001635",
    "uri": "http://dx.doi.org/10.13039/501100001635",
    "name": "University of Limerick",
-    "synonym": []
+    "synonym": ["501100014531"]
  },
  {
    "id": "501100001636",
@ -468,7 +463,7 @@
    "id": "501100002736",
    "uri": "http://dx.doi.org/10.13039/501100002736",
    "name": "Covidien",
-    "synonym": []
+    "synonym": ["501100003956"]
  },
  {
    "id": "501100002755",
@ -518,12 +513,6 @@
    "name": "Irish Institute of Clinical Neuroscience",
    "synonym": []
  },
-  {
-    "id": "501100003956",
-    "uri": "http://dx.doi.org/10.13039/501100003956",
-    "name": "Aspect Medical Systems",
-    "synonym": []
-  },
  {
    "id": "501100004162",
    "uri": "http://dx.doi.org/10.13039/501100004162",
@ -644,12 +633,7 @@
    "name": "Irish Centre for High-End Computing",
    "synonym": []
  },
-  {
-    "id": "501100019905",
-    "uri": "http://dx.doi.org/10.13039/501100019905",
-    "name": "Galway University Foundation",
-    "synonym": []
-  },
+
  {
    "id": "501100020036",
    "uri": "http://dx.doi.org/10.13039/501100020036",
@ -824,12 +808,7 @@
    "name": "Energy Policy Research Centre, Economic and Social Research Institute",
    "synonym": []
  },
-  {
-    "id": "501100014531",
-    "uri": "http://dx.doi.org/10.13039/501100014531",
-    "name": "Physical Education and Sport Sciences Department, University of Limerick",
-    "synonym": []
-  },
+
  {
    "id": "501100014745",
    "uri": "http://dx.doi.org/10.13039/501100014745",
@ -842,22 +821,11 @@
    "name": "ADAPT - Centre for Digital Content Technology",
    "synonym": []
  },
-  {
-    "id": "501100020570",
-    "uri": "http://dx.doi.org/10.13039/501100020570",
-    "name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway",
-    "synonym": []
-  },
+
  {
    "id": "501100020871",
    "uri": "http://dx.doi.org/10.13039/501100020871",
    "name": "Bernal Institute, University of Limerick",
    "synonym": []
-  },
-  {
-    "id": "501100023852",
-    "uri": "http://dx.doi.org/10.13039/501100023852",
-    "name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway",
-    "synonym": []
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml
@ -48,12 +48,37 @@
            <description>Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb.</description>
        </property>

+        <property>
+            <name>JAVA_HOME</name>
+            <value>/srv/java/openjdk-17</value>
+            <description>Used to configure the Java home location for oozie.launcher.mapreduce.map.env</description>
+        </property>
+
+        <property>
+            <name>JAVA_OPTS</name>
+                <value>-Dcom.sun.security.enableAIAcaIssuers=true</value>
+            <description>Used to configure the JAVA_OPTS parameter</description>
+        </property>

    </parameters>

    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapreduce.map.env</name>
+                <value>JAVA_HOME=${JAVA_HOME}</value>
+            </property>
+        </configuration>
    </global>

    <start to="collection_mode"/>
@ -99,7 +124,7 @@
    <action name="CollectionWorker">
        <java>
            <main-class>eu.dnetlib.dhp.collection.CollectorWorkerApplication</main-class>
-            <java-opts>${collection_java_xmx}</java-opts>
+            <java-opts>${JAVA_OPTS} ${collection_java_xmx}</java-opts>
            <arg>--apidescriptor</arg><arg>${apiDescription}</arg>
            <arg>--namenode</arg><arg>${nameNode}</arg>
            <arg>--workflowId</arg><arg>${workflowId}</arg>
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala
@ -93,7 +93,7 @@ case object Crossref2Oaf {

    val cf = new KeyValue
    cf.setValue("UnpayWall")
-    cf.setKey(s"10|openaire____:${DHPUtils.md5("UnpayWall".toLowerCase)}")
+    cf.setKey(s"10|openaire____::${DHPUtils.md5("UnpayWall".toLowerCase)}")
    cf

  }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -88,6 +88,7 @@ public class PrepareAffiliationRelationsTest {
 					"-pubmedInputPath", crossrefAffiliationRelationPath,
 					"-openapcInputPath", crossrefAffiliationRelationPath,
 					"-dataciteInputPath", crossrefAffiliationRelationPath,
+					"-webCrawlInputPath", crossrefAffiliationRelationPath,
 					"-outputPath", outputPath
 				});

@ -104,7 +105,7 @@ public class PrepareAffiliationRelationsTest {
 //            );
 //        }
 		// count the number of relations
-		assertEquals(80, tmp.count());
+		assertEquals(120, tmp.count());

 		Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
 		dataset.createOrReplaceTempView("result");
@ -115,7 +116,7 @@ public class PrepareAffiliationRelationsTest {
 		// verify that we have equal number of bi-directional relations
 		Assertions
 			.assertEquals(
-				40, execVerification
+				60, execVerification
 					.filter(
 						"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
 					.collectAsList()
@ -123,7 +124,7 @@ public class PrepareAffiliationRelationsTest {

 		Assertions
 			.assertEquals(
-				40, execVerification
+				60, execVerification
 					.filter(
 						"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
 					.collectAsList()
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java
@ -0,0 +1,58 @@
+
+package eu.dnetlib.dhp.collection.plugin.researchfi;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.dom4j.DocumentException;
+import org.dom4j.DocumentHelper;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class ResearchFiCollectorPluginTest {
+
+	private final ResearchFiCollectorPlugin plugin = new ResearchFiCollectorPlugin();
+
+	@Test
+	@Disabled
+	void testCollect() throws CollectorException {
+		final ApiDescriptor api = new ApiDescriptor();
+		api.setBaseUrl("https://research.fi/api/rest/v1/funding-decisions?FunderName=AKA&FundingStartYearFrom=2022");
+		api.setProtocol("research_fi");
+		api
+			.getParams()
+			.put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token");
+		api.getParams().put("auth_client_id", "");
+		api.getParams().put("auth_client_secret", "");
+
+		final AtomicLong count = new AtomicLong(0);
+		final Set<String> ids = new HashSet<>();
+
+		this.plugin.collect(api, new AggregatorReport()).forEach(s -> {
+
+			if (count.getAndIncrement() == 0) {
+				System.out.println("First: " + s);
+			}
+
+			try {
+				final String id = DocumentHelper.parseText(s).valueOf("/recordWrap/funderProjectNumber");
+				if (ids.contains(id)) {
+					System.out.println("Id already present: " + id);
+				}
+				ids.add(id);
+			} catch (final DocumentException e) {
+				throw new RuntimeException(e);
+			}
+		});
+
+		System.out.println("Total records: " + count);
+		System.out.println("Total identifiers: " + ids.size());
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json
@ -5,3 +5,5 @@
 {"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]}
 {"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]}
 {"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]}
+{"DOI": "10.1080/13669877.2015.1042504", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/03265fv13"}]}
+{"DOI": "10.1007/3-540-47984-8_14", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/00a0n9e72"}]}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
@ -26,15 +26,15 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Software;

-public class PrepareSimpleEntititiesJob {
+public class PrepareSimpleEntitiesJob {

-	private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntititiesJob.class);
+	private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntitiesJob.class);

 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
-					PrepareSimpleEntititiesJob.class
+					PrepareSimpleEntitiesJob.class
 						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
 		parser.parseArgument(args);

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
@ -160,8 +160,7 @@ public class ConversionUtils {
 			.stream()
 			.filter(Objects::nonNull)
 			.filter(pid -> pid.getQualifier() != null)
-			.filter(pid -> pid.getQualifier().getClassid() != null)
-			.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))
+			.filter(pid -> StringUtils.startsWithIgnoreCase(pid.getQualifier().getClassid(), ModelConstants.ORCID))
 			.map(StructuredProperty::getValue)
 			.map(ConversionUtils::cleanOrcid)
 			.filter(StringUtils::isNotBlank)
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
@ -7,7 +7,7 @@
        </property>
        <property>
            <name>outputDir</name>
-            <description>the path where the the generated data will be stored</description>
+            <description>the path where the generated data will be stored</description>
        </property>
 		<property>
            <name>datasourceIdWhitelist</name>
@ -179,17 +179,18 @@
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareSimpleEntititiesJob</name>
-            <class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntititiesJob</class>
+            <class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntitiesJob</class>
            <jar>dhp-broker-events-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=5000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -209,11 +210,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -234,11 +236,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -258,11 +261,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=5000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -282,11 +286,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=10000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -306,11 +311,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=2000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -332,11 +338,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -356,11 +363,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -380,11 +388,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -404,11 +413,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -428,11 +438,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -452,11 +463,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
@ -476,11 +488,12 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=8000
            </spark-opts>
            <arg>--workingDir</arg><arg>${workingDir}</arg>
            <arg>--outputDir</arg><arg>${outputDir}</arg>
@ -503,6 +516,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -535,6 +549,7 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -562,6 +577,7 @@
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -585,6 +601,7 @@
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcidTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcidTest.java
@ -0,0 +1,66 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers.simple;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import eu.dnetlib.broker.objects.OaBrokerAuthor;
+import eu.dnetlib.broker.objects.OaBrokerMainEntity;
+
+class EnrichMissingAuthorOrcidTest {
+
+	final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();
+
+	@BeforeEach
+	void setUp() throws Exception {
+	}
+
+	@Test
+	void testFindDifferences_1() {
+		final OaBrokerMainEntity source = new OaBrokerMainEntity();
+		final OaBrokerMainEntity target = new OaBrokerMainEntity();
+		final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
+		assertTrue(list.isEmpty());
+	}
+
+	@Test
+	void testFindDifferences_2() {
+		final OaBrokerMainEntity source = new OaBrokerMainEntity();
+		final OaBrokerMainEntity target = new OaBrokerMainEntity();
+
+		source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
+		target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
+
+		final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
+		assertEquals(1, list.size());
+	}
+
+	@Test
+	void testFindDifferences_3() {
+		final OaBrokerMainEntity source = new OaBrokerMainEntity();
+		final OaBrokerMainEntity target = new OaBrokerMainEntity();
+
+		source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
+		target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
+
+		final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
+		assertTrue(list.isEmpty());
+	}
+
+	@Test
+	void testFindDifferences_4() {
+		final OaBrokerMainEntity source = new OaBrokerMainEntity();
+		final OaBrokerMainEntity target = new OaBrokerMainEntity();
+		source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
+		target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
+
+		final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
+		assertTrue(list.isEmpty());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java
+++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtilsTest.java
@ -2,27 +2,32 @@
 package eu.dnetlib.dhp.broker.oa.util;

 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;

 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;

 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;

+import eu.dnetlib.broker.objects.OaBrokerMainEntity;
 import eu.dnetlib.broker.objects.OaBrokerTypedValue;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Instance;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

-class ConversionUtilsTest {
+public class ConversionUtilsTest {

 	@BeforeEach
-	void setUp() throws Exception {
+	public void setUp() throws Exception {
 	}

 	@Test
-	void testAllResultPids() {
+	public void testAllResultPids() {
 		final Qualifier qf = new Qualifier();
 		qf.setClassid("test");
 		qf.setClassname("test");
@ -91,4 +96,42 @@ class ConversionUtilsTest {
 		assertEquals(6, list.size());
 	}

+	public void testOafResultToBrokerResult() {
+
+		final Author a1 = createAuthor("Michele Artini", "0000-0002-4406-428X");
+		final Author a2 = createAuthor("Claudio Atzori", "http://orcid.org/0000-0001-9613-6639");
+		final Author a3 = createAuthor("Alessia Bardi", null);
+
+		final Result r = new Result();
+		r.setAuthor(Arrays.asList(a1, a2, a3));
+
+		final OaBrokerMainEntity br = ConversionUtils.oafResultToBrokerResult(r);
+
+		assertEquals(3, br.getCreators().size());
+		assertEquals("0000-0002-4406-428X", br.getCreators().get(0).getOrcid());
+		assertEquals("0000-0001-9613-6639", br.getCreators().get(1).getOrcid());
+		assertNull(br.getCreators().get(2).getOrcid());
+	}
+
+	private Author createAuthor(final String name, final String orcid) {
+
+		final Author a = new Author();
+		a.setFullname("Michele Artini");
+
+		if (orcid != null) {
+			final Qualifier q = new Qualifier();
+			q.setClassid(ModelConstants.ORCID);
+			q.setClassname(ModelConstants.ORCID);
+			q.setSchemeid("dnet:pids");
+			q.setSchemename("dnet:pids");
+
+			final StructuredProperty pid = new StructuredProperty();
+			pid.setQualifier(q);
+			pid.setValue(orcid);
+
+			a.setPid(Arrays.asList(pid));
+		}
+		return a;
+	}
+
 }
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
@ -203,8 +203,8 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
 			WindowSpec w = Window
 				.partitionBy("groupId")
 				.orderBy(
-					col("lastUsage").desc_nulls_last(),
 					col("pidType").asc_nulls_last(),
+					col("lastUsage").desc_nulls_last(),
 					col("collectedfrom").desc_nulls_last(),
 					col("date").asc_nulls_last(),
 					col("id").asc_nulls_last());
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/config-default.xml
@ -15,4 +15,12 @@
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>pivotHistoryDatabase</name>
+        <value>&#x200B;</value>
+    </property>
 </configuration>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml
@ -198,6 +198,8 @@
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
            <arg>--actionSetId</arg><arg>${actionSetId}</arg>
            <arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+            <arg>--pivotHistoryDatabase</arg><arg>${pivotHistoryDatabase}</arg>
        </spark>
        <ok to="PrepareOrgRels"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@ -190,7 +190,7 @@ public class SparkDedupTest implements Serializable {
 		System.out.println("orp_simrel = " + orp_simrel);

 		if (CHECK_CARDINALITIES) {
-			assertEquals(751, orgs_simrel);
+			assertEquals(742, orgs_simrel);
 			assertEquals(566, pubs_simrel);
 			assertEquals(113, sw_simrel);
 			assertEquals(148, ds_simrel);
@ -251,7 +251,7 @@ public class SparkDedupTest implements Serializable {

 		// entities simrels supposed to be equal to the number of previous step (no rels in whitelist)
 		if (CHECK_CARDINALITIES) {
-			assertEquals(751, orgs_simrel);
+			assertEquals(742, orgs_simrel);
 			assertEquals(566, pubs_simrel);
 			assertEquals(148, ds_simrel);
 			assertEquals(280, orp_simrel);
@ -442,7 +442,7 @@ public class SparkDedupTest implements Serializable {
 		final List<Relation> merges = pubs
 			.filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'")
 			.collectAsList();
-		assertEquals(3, merges.size());
+		assertEquals(1, merges.size());
 		Set<String> dups = Sets
 			.newHashSet(
 				"50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73",
@ -451,7 +451,7 @@ public class SparkDedupTest implements Serializable {
 		merges.forEach(r -> {
 			assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
 			assertEquals(ModelConstants.DEDUP, r.getSubRelType());
-			assertEquals(ModelConstants.MERGES, r.getRelClass());
+			assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
 			assertTrue(dups.contains(r.getTarget()));
 		});

@ -561,7 +561,7 @@ public class SparkDedupTest implements Serializable {
 		System.out.println("orp_mergerel = " + orp_mergerel);

 		if (CHECK_CARDINALITIES) {
-			assertEquals(1268, orgs_mergerel);
+			assertEquals(1278, orgs_mergerel);
 			assertEquals(1156, pubs.count());
 			assertEquals(292, sw_mergerel);
 			assertEquals(476, ds_mergerel);
@ -618,7 +618,7 @@ public class SparkDedupTest implements Serializable {
 		System.out.println("orp_deduprecord = " + orp_deduprecord);

 		if (CHECK_CARDINALITIES) {
-			assertEquals(86, orgs_deduprecord);
+			assertEquals(78, orgs_deduprecord);
 			assertEquals(96, pubs.count());
 			assertEquals(47, sw_deduprecord);
 			assertEquals(97, ds_deduprecord);
@ -761,7 +761,7 @@ public class SparkDedupTest implements Serializable {

 		if (CHECK_CARDINALITIES) {
 			assertEquals(930, publications);
-			assertEquals(839, organizations);
+			assertEquals(831, organizations);
 			assertEquals(100, projects);
 			assertEquals(100, datasource);
 			assertEquals(196, softwares);
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java
@ -22,8 +22,11 @@ import java.util.Properties;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.extension.ExtendWith;
@ -143,7 +146,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
 			.count();

-		assertEquals(86, orgs_simrel);
+		assertEquals(92, orgs_simrel);
 	}

 	@Test
@ -172,7 +175,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
 			.load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization"))
 			.count();

-		assertEquals(122, orgs_simrel);
+		assertEquals(128, orgs_simrel);
 	}

 	@Test
@ -207,7 +210,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
 			.read()
 			.load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")
 			.count();
-		assertEquals(132, orgs_mergerel);
+		assertEquals(128, orgs_mergerel);

 		// verify that a DiffRel is in the mergerels (to be sure that the job supposed to remove them has something to
 		// do)
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java
@ -9,6 +9,7 @@ import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 import org.junit.platform.commons.util.StringUtils;

+import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest;
 import eu.dnetlib.pace.config.DedupConfig;
 import eu.dnetlib.pace.model.SparkModel;

@ -24,6 +25,31 @@ class JsonPathTest {

 		Row row = SparkModel.apply(conf).rowFromJson(org);

+		System.out.println("row = " + row);
+		Assertions.assertNotNull(row);
+		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
+
+		System.out.println("row = " + row.getAs("countrytitle"));
+	}
+
+	@Test
+	void jsonToModelTest() throws IOException {
+		DedupConfig conf = DedupConfig
+			.load(
+				IOUtils
+					.toString(
+						SparkOpenorgsDedupTest.class
+							.getResourceAsStream(
+								"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
+
+		final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json"));
+
+		Row row = SparkModel.apply(conf).rowFromJson(org);
+		// to check that the same parsing returns the same row
+		Row row1 = SparkModel.apply(conf).rowFromJson(org);
+
+		Assertions.assertEquals(row, row1);
+		System.out.println("row = " + row);
 		Assertions.assertNotNull(row);
 		Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier")));
 	}
--- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json
@ -4,8 +4,8 @@
    "dedupRun" : "001",
    "entityType" : "organization",
    "subEntityValue": "organization",
-    "orderField" : "legalname",
-    "queueMaxSize" : "2000",
+    "orderField" : "original_legalname",
+    "queueMaxSize" : "100000",
    "groupMaxSize" : "50",
    "slidingWindowSize" : "200",
    "idPath":"$.id",
@ -15,10 +15,10 @@
  },
  "pace" : {
    "clustering" : [
-      { "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
-      { "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
+      { "name" : "sortedngrampairs", "fields" : [ "original_legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
+      { "name" : "suffixprefix", "fields" : [ "original_legalname" ], "params" : { "max" : 1, "len" : "3" } },
      { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } },
-      { "name" : "keywordsclustering", "fields" : [ "legalname" ], "params" : { "max": 2, "windowSize": 4} }
+      { "name" : "legalnameclustering", "fields" : [ "legalname" ], "params" : { "max": 2} }
    ],
    "decisionTree" : {
      "start": {
@ -29,16 +29,23 @@
            "weight": 1,
            "countIfUndefined": "false",
            "params": {}
+          },
+          {
+            "field": "rorid",
+            "comparator": "exactMatch",
+            "weight": 1,
+            "countIfUndefined": "false",
+            "params": {}
          }
        ],
        "threshold": 1,
-        "aggregation": "AVG",
+        "aggregation": "OR",
        "positive": "MATCH",
        "negative": "NO_MATCH",
-        "undefined": "layer2",
+        "undefined": "necessaryConditions",
        "ignoreUndefined": "false"
      },
-      "layer2": {
+      "necessaryConditions": {
        "fields": [
          {
            "field": "websiteurl",
@ -49,20 +56,20 @@
          },
          {
            "field": "country",
-            "comparator": "exactMatch",
+            "comparator": "countryMatch",
            "weight": 1,
            "countIfUndefined": "true",
            "params": {}
          },
          {
-            "field": "legalname",
+            "field": "original_legalname",
            "comparator": "numbersMatch",
            "weight": 1,
            "countIfUndefined": "true",
            "params": {}
          },
          {
-            "field": "legalname",
+            "field": "original_legalname",
            "comparator": "romansMatch",
            "weight": 1,
            "countIfUndefined": "true",
@ -71,68 +78,64 @@
        ],
        "threshold": 1,
        "aggregation": "AND",
-        "positive": "layer3",
+        "positive": "cityCheck",
        "negative": "NO_MATCH",
-        "undefined": "layer3",
+        "undefined": "cityCheck",
        "ignoreUndefined": "true"
      },
-      "layer3": {
+      "cityCheck": {
        "fields": [
          {
            "field": "legalname",
-            "comparator": "cityMatch",
+            "comparator": "codeMatch",
            "weight": 1.0,
            "countIfUndefined": "true",
            "params": {
-              "windowSize": "4"
+              "codeRegex": "city::\\d+"
            }
          }
        ],
        "threshold": 0.1,
        "aggregation": "AVG",
-        "positive": "layer4",
+        "positive": "keywordCheck",
        "negative": "NO_MATCH",
        "undefined": "NO_MATCH",
        "ignoreUndefined": "true"
      },
-      "layer4": {
+      "keywordCheck": {
        "fields": [
          {
            "field": "legalname",
-            "comparator": "keywordMatch",
+            "comparator": "codeMatch",
            "weight": 1.0,
            "countIfUndefined": "true",
            "params": {
-              "windowSize": "4"
+              "codeRegex": "key::\\d+"
            }
          }
        ],
        "threshold": 0.7,
        "aggregation": "AVG",
-        "positive": "layer5",
+        "positive": "nameCheck",
        "negative": "NO_MATCH",
-        "undefined": "layer5",
+        "undefined": "nameCheck",
        "ignoreUndefined": "true"
      },
-      "layer5": {
+      "nameCheck": {
        "fields": [
          {
            "field": "legalname",
-            "comparator": "jaroWinklerNormalizedName",
+            "comparator": "jaroWinklerLegalname",
            "weight": 0.9,
            "countIfUndefined": "true",
-            "params": {
-              "windowSize": "4"
-            }
+            "params": {}
          },
          {
            "field": "legalshortname",
-            "comparator": "jaroWinklerNormalizedName",
+            "comparator": "jaroWinklerLegalname",
            "weight": 0.1,
            "countIfUndefined": "false",
-            "params": {
-              "windowSize": 4
-            }
+            "params": {}
          }
        ],
        "threshold": 0.9,
@ -144,126 +147,16 @@
      }
    },
    "model" : [
-      { "name" : "country", "type" : "String", "path" : "$.country.classid"},
-      { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"},
-      { "name" : "legalname", "type" : "String", "path" : "$.legalname.value" },
+      { "name" : "country", "type" : "String", "path" : "$.country.classid", "infer" : "country", "inferenceFrom" : "$.legalname.value"},
+      { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value", "infer" : "city_keyword"},
+      { "name" : "original_legalname", "type" : "String", "path" : "$.legalname.value" },
+      { "name" : "legalname", "type" : "String", "path" : "$.legalname.value", "infer" : "city_keyword"},
      { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" },
      { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"},
+      { "name" : "rorid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='ROR')].value"},
      { "name" : "originalId", "type" : "String", "path" : "$.id" }
    ],
-    "blacklists" : {
-      "legalname" : []
-    },
-    "synonyms": {
-      "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"],
-      "key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"],
-      "key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"],
-      "key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"],
-      "key::5": ["hospital","ospedale","hôpital","hospital","hospital","Krankenhaus","szpital","больница","ziekenhuis","νοσοκομείο"],
-      "key::6": ["research","ricerca","recherche","investigacion","pesquisa","Forschung","badania","исследования","onderzoek","έρευνα","erevna","erevnas"],
-      "key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"],
-      "key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"],
-      "key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"],
-      "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"],
-      "key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"],
-      "key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"],
-      "key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"],
-      "key::14": ["community","comunita","communauté","comunidad","comunidade","Gemeinschaft","społeczność","сообщество","gemeenschap","κοινότητα"],
-      "key::15": ["school","scuola","école","escuela","escola","schule","Szkoła","школа","school","σχολείο"],
-      "key::16": ["education","educazione","éducation","educacion","Educação","Bildung","Edukacja","образование","opleiding","εκπαίδευση"],
-      "key::17": ["academy","accademia","académie","academia","academia","Akademie","akademie","академия","academie","ακαδημία"],
-      "key::18": ["public","pubblico","public","publique","publics","publiques","publico","publico","Öffentlichkeit","publiczny","публичный","publiek","publieke","δημόσιος","δημόσια","δημόσιο"],
-      "key::19": ["museum","museo","musée","mueso","museu","museum","muzeum","музей","museum","μουσείο"],
-      "key::20": ["group","gruppo","groupe","grupo","grupo","gruppe","grupa","группа","groep","ομάδα","όμιλος"],
-      "key::21": ["department","dipartimento","département","departamento","departamento","abteilung","departament","отдел","afdeling","τμήμα"],
-      "key::22": ["council","consiglio","conseil","Consejo","conselho","gesellschaft","rada","совет","raad","συμβούλιο"],
-      "key::23": ["library","biblioteca","bibliothèque","biblioteca","biblioteca","Bibliothek","biblioteka","библиотека","bibliotheek","βιβλιοθήκη"],
-      "key::24": ["ministry","ministero","ministère","ministerio","ministério","Ministerium","ministerstwo","министерство","ministerie","υπουργείο"],
-      "key::25": ["services","servizi","services","servicios","Serviços","Dienstleistungen","usługi","услуги","diensten","υπηρεσίες"],
-      "key::26": ["central","centrale","central","centrale","centrales","central","central","zentral","centralny","цетральный","centraal","κεντρικός","κεντρική","κεντρικό","κεντρικά"],
-      "key::27": ["general","generale","général","générale","généraux","générales","general","geral","general","Allgemeines","general","общий","algemeen","algemene","γενικός","γενική","γενικό","γενικά"],
-      "key::28": ["applied","applicati","appliqué","appliquée","appliqués","appliquées","aplicado","aplicada","angewendet","stosowany","прикладной","toegepast","toegepaste","εφαρμοσμένος","εφαρμοσμένη","εφαρμοσμένο","εφαρμοσμένα"],
-      "key::29": ["european","europee","europea","européen","européenne","européens","européennes","europeo","europeu","europäisch","europejski","европейский","Europees","Europese","ευρωπαϊκός","ευρωπαϊκή","ευρωπαϊκό","ευρωπαϊκά"],
-      "key::30": ["agency","agenzia","agence","agencia","agencia","agentur","agencja","агенция","agentschap","πρακτορείο"],
-      "key::31": ["laboratory","laboratorio","laboratoire","laboratorio","laboratorio","labor","laboratorium","лаборатория","laboratorium","εργαστήριο"],
-      "key::32": ["industry","industria","industrie","индустрия","industrie","βιομηχανία"],
-      "key::33": ["industrial","industriale","industriel","industrielle","industriels","industrielles","индустриальный","industrieel","βιομηχανικός","βιομηχανική","βιομηχανικό","βιομηχανικά","βιομηχανικές"],
-      "key::34": ["consortium","consorzio","consortium","консорциум","consortium","κοινοπραξία"],
-      "key::35": ["organization","organizzazione","organisation","organización","organização","organizacja","организация","organisatie","οργανισμός"],
-      "key::36": ["authority","autorità","autorité","авторитет","autoriteit"],
-      "key::37": ["federation","federazione","fédération","федерация","federatie","ομοσπονδία"],
-      "key::38": ["observatory","osservatorio","observatoire","обсерватория","observatorium","αστεροσκοπείο"],
-      "key::39": ["bureau","ufficio","bureau","офис","bureau","γραφείο"],
-      "key::40": ["company","impresa","compagnie","société","компания","bedrijf","εταιρία"],
-      "key::41": ["polytechnic","politecnico","polytechnique","политехника","polytechnisch","πολυτεχνείο","universita politecnica","polytechnic university","universidad politecnica","universitat politecnica","politechnika","politechniki","university technology","university science technology"],
-      "key::42": ["coalition","coalizione","coalition","коалиция","coalitie","συνασπισμός"],
-      "key::43": ["initiative","iniziativa","initiative","инициатива","initiatief","πρωτοβουλία"],
-      "key::44": ["academic","accademico","académique","universitaire","акадеческий academisch","ακαδημαϊκός","ακαδημαϊκή","ακαδημαϊκό","ακαδημαϊκές","ακαδημαϊκοί"],
-      "key::45": ["institution","istituzione","institution","институциональный","instelling","ινστιτούτο"],
-      "key::46": ["division","divisione","division","отделение","divisie","τμήμα"],
-      "key::47": ["committee","comitato","comité","комитет","commissie","επιτροπή"],
-      "key::48": ["promotion","promozione","продвижение","proothisis","forderung"],
-      "key::49": ["medical","medicine","clinical","medicina","clinici","médico","medicina","clínica","médico","medicina","clínica","medizinisch","Medizin","klinisch","medisch","geneeskunde","klinisch","ιατρικός","ιατρική","ιατρικό","ιατρικά","κλινικός","κλινική","κλινικό","κλινικά","tıbbi","tıp","klinik","orvosi","orvostudomány","klinikai","zdravniški","medicinski","klinični","meditsiini","kliinik","kliiniline"],
-      "key::50": ["technology","technological","tecnologia","tecnologie","tecnología","tecnológico","tecnologia","tecnológico","Technologie","technologisch","technologie","technologisch","τεχνολογία","τεχνολογικός","τεχνολογική","τεχνολογικό","teknoloji","teknolojik","technológia","technológiai","tehnologija","tehnološki","tehnoloogia","tehnoloogiline","technologii","technical","texniki","teknik"],
-      "key::51": ["science","scientific","scienza","scientifiche","scienze","ciencia","científico","ciência","científico","Wissenschaft","wissenschaftlich","wetenschap","wetenschappelijk","επιστήμη","επιστημονικός","επιστημονική","επιστημονικό","επιστημονικά","bilim","bilimsel","tudomány","tudományos","znanost","znanstveni","teadus","teaduslik",""],
-      "key::52": ["engineering","ingegneria","ingeniería","engenharia","Ingenieurwissenschaft","ingenieurswetenschappen","bouwkunde","μηχανικός","μηχανική","μηχανικό","mühendislik","mérnöki","Inženirstvo","inseneeria","inseneri",""],
-      "key::53": ["management","gestione","gestionale","gestionali","gestión","administración","gestão","administração","Verwaltung","management","διαχείριση","yönetim","menedzsment","vodstvo","upravljanje","management","juhtkond","juhtimine","haldus",""],
-      "key::54": ["energy","energia","energía","energia","Energie","energie","ενέργεια","enerji","energia","energija","energia",""],
-      "key::55": ["agricultural","agriculture","agricoltura","agricole","agrícola","agricultura","agrícola","agricultura","landwirtschaftlich","Landwirtschaft","landbouwkundig","landbouw","αγροτικός","αγροτική","αγροτικό","γεωργικός","γεωργική","γεωργικό","γεωργία","tarımsal","tarım","mezőgazdasági","mezőgazdaság","poljedelski","poljedelstvo","põllumajandus","põllumajanduslik",""],
-      "key::56": ["information","informazione","información","informação","Information","informatie","πληροφορία","bilgi","információ","informacija","informatsioon","informatycznych",""],
-      "key::57": ["social","sociali","social","social","Sozial","sociaal","maatschappelijk","κοινωνικός","κοινωνική","κοινωνικό","κοινωνικά","sosyal","szociális","družbeni","sotsiaal","sotsiaalne",""],
-      "key::58": ["environmental","ambiente","medioambiental","ambiente","medioambiente","meioambiente","Umwelt","milieu","milieuwetenschap","milieukunde","περιβαλλοντικός","περιβαλλοντική","περιβαλλοντικό","περιβαλλοντικά","çevre","környezeti","okoliški","keskonna",""],
-      "key::59": ["business","economia","economiche","economica","negocio","empresa","negócio","Unternehmen","bedrijf","bedrijfskunde","επιχείρηση","iş","üzleti","posel","ettevõte/äri",""],
-      "key::60": ["pharmaceuticals","pharmacy","farmacia","farmaceutica","farmacéutica","farmacia","farmacêutica","farmácia","Pharmazeutika","Arzneimittelkunde","farmaceutica","geneesmiddelen","apotheek","φαρμακευτικός","φαρμακευτική","φαρμακευτικό","φαρμακευτικά","φαρμακείο","ilaç","eczane","gyógyszerészeti","gyógyszertár","farmacevtika","lekarništvo","farmaatsia","farmatseutiline",""],
-      "key::61": ["healthcare","health services","salute","atenciónmédica","cuidadodelasalud","cuidadoscomasaúde","Gesundheitswesen","gezondheidszorg","ιατροφαρμακευτικήπερίθαλψη","sağlıkhizmeti","egészségügy","zdravstvo","tervishoid","tervishoiu",""],
-      "key::62": ["history","storia","historia","história","Geschichte","geschiedenis","geschiedkunde","ιστορία","tarih","történelem","zgodovina","ajalugu",""],
-      "key::63": ["materials","materiali","materia","materiales","materiais","materialen","υλικά","τεκμήρια","malzemeler","anyagok","materiali","materjalid","vahendid",""],
-      "key::64": ["economics","economia","economiche","economica","economía","economia","Wirtschaft","economie","οικονομικά","οικονομικέςεπιστήμες","ekonomi","közgazdaságtan","gospodarstvo","ekonomija","majanduslik","majandus",""],
-      "key::65": ["therapeutics","terapeutica","terapéutica","terapêutica","therapie","θεραπευτική","tedavibilimi","gyógykezelés","terapevtika","terapeutiline","ravi",""],
-      "key::66": ["oncology","oncologia","oncologico","oncología","oncologia","Onkologie","oncologie","ογκολογία","onkoloji","onkológia","onkologija","onkoloogia",""],
-      "key::67": ["natural","naturali","naturale","natural","natural","natürlich","natuurlijk","φυσικός","φυσική","φυσικό","φυσικά","doğal","természetes","naraven","loodus",""],
-      "key::68": ["educational","educazione","pedagogia","educacional","educativo","educacional","pädagogisch","educatief","εκπαιδευτικός","εκπαιδευτική","εκπαιδευτικό","εκπαιδευτικά","eğitimsel","oktatási","izobraževalen","haridus","hariduslik",""],
-      "key::69": ["biomedical","biomedica","biomédico","biomédico","biomedizinisch","biomedisch","βιοιατρικός","βιοιατρική","βιοιατρικό","βιοιατρικά","biyomedikal","orvosbiológiai","biomedicinski","biomeditsiiniline",""],
-      "key::70": ["veterinary","veterinaria","veterinarie","veterinaria","veterinária","tierärtzlich","veterinair","veeartsenijlkunde","κτηνιατρικός","κτηνιατρική","κτηνιατρικό","κτηνιατρικά","veteriner","állatorvosi","veterinar","veterinarski","veterinaaria",""],
-      "key::71": ["chemistry","chimica","química","química","Chemie","chemie","scheikunde","χημεία","kimya","kémia","kemija","keemia",""],
-      "key::72": ["security","sicurezza","seguridad","segurança","Sicherheit","veiligheid","ασφάλεια","güvenlik","biztonsági","varnost","turvalisus","julgeolek",""],
-      "key::73": ["biotechnology","biotecnologia","biotecnologie","biotecnología","biotecnologia","Biotechnologie","biotechnologie","βιοτεχνολογία","biyoteknoloji","biotechnológia","biotehnologija","biotehnoloogia",""],
-      "key::74": ["military","militare","militari","militar","militar","Militär","militair","leger","στρατιωτικός","στρατιωτική","στρατιωτικό","στρατιωτικά","askeri","katonai","vojaški","vojni","militaar","wojskowa",""],
-      "key::75": ["theological","teologia","teologico","teológico","tecnológica","theologisch","theologisch","θεολογικός","θεολογική","θεολογικό","θεολογικά","teolojik","technológiai","teološki","teoloogia","usuteadus","teoloogiline",""],
-      "key::76": ["electronics","elettronica","electrónica","eletrônicos","Elektronik","elektronica","ηλεκτρονική","elektronik","elektronika","elektronika","elektroonika",""],
-      "key::77": ["forestry","forestale","forestali","silvicultura","forestal","floresta","Forstwirtschaft","bosbouw","δασοκομία","δασολογία","ormancılık","erdészet","gozdarstvo","metsandus",""],
-      "key::78": ["maritime","marittima","marittime","marittimo","marítimo","marítimo","maritiem","ναυτικός","ναυτική","ναυτικό","ναυτικά","ναυτιλιακός","ναυτιλιακή","ναυτιλιακό","ναυτιλιακά","θαλάσσιος","θαλάσσια","θαλάσσιο","denizcilik","tengeri","morski","mere","merendus",""],
-      "key::79": ["sports","sport","deportes","esportes","Sport","sport","sportwetenschappen","άθληση","γυμναστικήδραστηριότητα","spor","sport","šport","sport","spordi",""],
-      "key::80": ["surgery","chirurgia","chirurgiche","cirugía","cirurgia","Chirurgie","chirurgie","heelkunde","εγχείρηση","επέμβαση","χειρουργικήεπέμβαση","cerrahi","sebészet","kirurgija","kirurgia",""],
-      "key::81": ["cultural","culturale","culturali","cultura","cultural","cultural","kulturell","cultureel","πολιτιστικός","πολιτιστική","πολιτιστικό","πολιτισμικός","πολιτισμική","πολιτισμικό","kültürel","kultúrális","kulturni","kultuuri","kultuuriline",""],
-      "key::82": ["computerscience","informatica","ordenador","computadora","informática","computación","cienciasdelacomputación","ciênciadacomputação","Computer","computer","υπολογιστής","ηλεκτρονικόςυπολογιστής","bilgisayar","számítógép","računalnik","arvuti",""],
-      "key::83": ["finance","financial","finanza","finanziarie","finanza","financiero","finanças","financeiro","Finanzen","finanziell","financiën","financieel","χρηματοοικονομικά","χρηματοδότηση","finanse","finansal","pénzügy","pénzügyi","finance","finančni","finants","finantsiline",""],
-      "key::84": ["communication","comunicazione","comuniciación","comunicação","Kommunikation","communication","επικοινωνία","iletişim","kommunikáció","komuniciranje","kommunikatsioon",""],
-      "key::85": ["justice","giustizia","justicia","justiça","Recht","Justiz","justitie","gerechtigheid","δικαιοσύνη","υπουργείοδικαιοσύνης","δίκαιο","adalet","igazságügy","pravo","õigus",""],
-      "key::86": ["aerospace","aerospaziale","aerospaziali","aeroespacio","aeroespaço","Luftfahrt","luchtvaart","ruimtevaart","αεροπορικός","αεροπορική","αεροπορικό","αεροναυπηγικός","αεροναυπηγική","αεροναυπηγικό","αεροναυπηγικά","havacılıkveuzay","légtér","zrakoplovstvo","atmosfäär","kosmos",""],
-      "key::87": ["dermatology","dermatologia","dermatología","dermatologia","Dermatologie","dermatologie","δρματολογία","dermatoloji","bőrgyógyászat","dermatológia","dermatologija","dermatoloogia",""],
-      "key::88": ["architecture","architettura","arquitectura","arquitetura","Architektur","architectuur","αρχιτεκτονική","mimarlık","építészet","arhitektura","arhitektuur",""],
-      "key::89": ["mathematics","matematica","matematiche","matemáticas","matemáticas","Mathematik","wiskunde","mathematica","μαθηματικά","matematik","matematika","matematika","matemaatika",""],
-      "key::90": ["language","lingue","linguistica","linguistiche","lenguaje","idioma","língua","idioma","Sprache","taal","taalkunde","γλώσσα","dil","nyelv","jezik","keel",""],
-      "key::91": ["neuroscience","neuroscienza","neurociencia","neurociência","Neurowissenschaft","neurowetenschappen","νευροεπιστήμη","nörobilim","idegtudomány","nevroznanost","neuroteadused",""],
-      "key::92": ["automation","automazione","automatización","automação","Automatisierung","automatisering","αυτοματοποίηση","otomasyon","automatizálás","avtomatizacija","automatiseeritud",""],
-      "key::93": ["pediatric","pediatria","pediatriche","pediatrico","pediátrico","pediatría","pediátrico","pediatria","pädiatrisch","pediatrische","παιδιατρική","pediatrik","gyermekgyógyászat","pediatrija","pediaatria",""],
-      "key::94": ["photonics","fotonica","fotoniche","fotónica","fotônica","Photonik","fotonica","φωτονική","fotonik","fotonika","fotonika","fotoonika",""],
-      "key::95": ["mechanics", "mechanical", "meccanica","meccaniche","mecánica","mecânica","Mechanik","Maschinenbau","mechanica","werktuigkunde","μηχανικής","mekanik","gépészet","mehanika","mehaanika",""],
-      "key::96": ["psychiatrics","psichiatria","psichiatrica","psichiatriche","psiquiatría","psiquiatria","Psychiatrie","psychiatrie","ψυχιατρική","psikiyatrik","pszihiátria","psihiatrija","psühhaatria",""],
-      "key::97": ["psychology","fisiologia","psicología","psicologia","Psychologie","psychologie","ψυχολογία","psikoloji","pszihológia","psihologija","psühholoogia",""],
-      "key::98": ["automotive","industriaautomobilistica","industriadelautomóvil","automotriz","industriaautomotriz","automotivo","Automobilindustrie","autoindustrie","αυτοκίνητος","αυτοκίνητη","αυτοκίνητο","αυτοκινούμενος","αυτοκινούμενη","αυτοκινούμενο","αυτοκινητιστικός","αυτοκινητιστική","αυτοκινητιστικό","otomotiv","autóipari","samogiben","avtomobilskaindustrija","auto-",""],
-      "key::99": ["neurology","neurologia","neurologiche","neurología","neurologia","Neurologie","neurologie","zenuwleer","νευρολογία","nöroloji","neurológia","ideggyógyászat","nevrologija","neuroloogia",""],
-      "key::100": ["geology","geologia","geologiche","geología","geologia","Geologie","geologie","aardkunde","γεωλογία","jeoloji","geológia","földtudomány","geologija","geoloogia",""],
-      "key::101": ["microbiology","microbiologia","micro-biologia","microbiologiche","microbiología","microbiologia","Mikrobiologie","microbiologie","μικροβιολογία","mikrobiyoloji","mikrobiológia","mikrobiologija","mikrobioloogia",""],
-      "key::102": ["informatics","informatica","informática","informática","informatica",""],
-      "key::103": ["forschungsgemeinschaft","comunita ricerca","research community","research foundation","research association"],
-      "key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"],
-      "key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"],
-      "key::106" : ["seminary", "seminario", "seminaire", "seminar"],
-      "key::107" : ["agricultural forestry", "af", "a f"],
-      "key::108" : ["agricultural mechanical", "am", "a m"],
-      "key::109" : ["catholic", "catholique", "katholische", "catolica", "cattolica", "catolico"]
-    }
+    "blacklists" : {},
+    "synonyms": {}
  }
 }
--- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -33,10 +33,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.bulktag.community.*;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.Context;
-import eu.dnetlib.dhp.schema.oaf.Datasource;
-import eu.dnetlib.dhp.schema.oaf.Project;
-import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
 import scala.Tuple2;

@ -114,27 +111,35 @@ public class SparkBulkTagJob {
 				extendCommunityConfigurationForEOSC(spark, inputPath, cc);
 				execBulkTag(
 					spark, inputPath, outputPath, protoMap, cc);
+				execEntityTag(
+					spark, inputPath + "organization", outputPath + "organization",
+					Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION,
+					TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
+				execEntityTag(
+					spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL),
+					Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
 				execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
-				execProjectTag(spark, inputPath, outputPath, Utils.getCommunityProjects(baseURL));
+
 			});
 	}

-	private static void execProjectTag(SparkSession spark, String inputPath, String outputPath,
-		CommunityEntityMap communityProjects) {
-		Dataset<Project> projects = readPath(spark, inputPath + "project", Project.class);
+	private static <E extends OafEntity> void execEntityTag(SparkSession spark, String inputPath, String outputPath,
+		CommunityEntityMap communityEntity, Class<E> entityClass,
+		String classID, String calssName) {
+		Dataset<E> entity = readPath(spark, inputPath, entityClass);
 		Dataset<EntityCommunities> pc = spark
 			.createDataset(
-				communityProjects
+				communityEntity
 					.keySet()
 					.stream()
-					.map(k -> EntityCommunities.newInstance(k, communityProjects.get(k)))
+					.map(k -> EntityCommunities.newInstance(k, communityEntity.get(k)))
 					.collect(Collectors.toList()),
 				Encoders.bean(EntityCommunities.class));

-		projects
-			.joinWith(pc, projects.col("id").equalTo(pc.col("entityId")), "left")
-			.map((MapFunction<Tuple2<Project, EntityCommunities>, Project>) t2 -> {
-				Project ds = t2._1();
+		entity
+			.joinWith(pc, entity.col("id").equalTo(pc.col("entityId")), "left")
+			.map((MapFunction<Tuple2<E, EntityCommunities>, E>) t2 -> {
+				E ds = t2._1();
 				if (t2._2() != null) {
 					List<String> context = Optional
 						.ofNullable(ds.getContext())
@ -156,8 +161,8 @@ public class SparkBulkTagJob {
 													false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false,
 													OafMapperUtils
 														.qualifier(
-															TaggingConstants.CLASS_ID_DATASOURCE,
-															TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE,
+															classID,
+															calssName,
 															ModelConstants.DNET_PROVENANCE_ACTIONS,
 															ModelConstants.DNET_PROVENANCE_ACTIONS),
 													"1")));
@ -166,17 +171,17 @@ public class SparkBulkTagJob {
 					});
 				}
 				return ds;
-			}, Encoders.bean(Project.class))
+			}, Encoders.bean(entityClass))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
-			.json(outputPath + "project");
+			.json(outputPath);

-		readPath(spark, outputPath + "project", Project.class)
+		readPath(spark, outputPath, entityClass)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
-			.json(inputPath + "project");
+			.json(inputPath);
 	}

 	private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath,
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java
@ -13,6 +13,9 @@ public class TaggingConstants {
 	public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
 	public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";

+	public static final String CLASS_ID_PROJECT = "community:project";
+	public static final String CLASS_ID_ORGANIZATION = "community:organization";
+
 	public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";

 	public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
@ -20,5 +23,8 @@ public class TaggingConstants {
 	public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
 	public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";

+	public static final String CLASS_NAME_BULKTAG_PROJECT = "Bulktagging for Community - Project";
+	public static final String CLASS_NAME_BULKTAG_ORGANIZATION = "Bulktagging for Community - Organization";
+
 	public static final String TAGGING_TRUST = "0.8";
 }
--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java
@ -465,6 +465,138 @@ public class BulkTagJobTest {

 	}

+	@Test
+	void organizationTag() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
+			.getPath();
+		LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
+		SparkBulkTagJob
+			.main(
+				new String[] {
+
+					"-isSparkSessionManaged", Boolean.FALSE.toString(),
+					"-sourcePath", sourcePath,
+					"-taggingConf", taggingConf,
+
+					"-outputPath", workingDir.toString() + "/",
+					"-baseURL", "https://services.openaire.eu/openaire/community/",
+
+					"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
+					"-nameNode", "local"
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Organization> tmp = sc
+			.textFile(workingDir.toString() + "/organization")
+			.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
+
+		Assertions.assertEquals(4, tmp.count());
+		org.apache.spark.sql.Dataset<Organization> verificationDataset = spark
+			.createDataset(tmp.rdd(), Encoders.bean(Organization.class));
+
+		verificationDataset.createOrReplaceTempView("organization");
+
+		String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
+			+ "from organization "
+			+ "lateral view explode(context) c as MyT "
+			+ "lateral view explode(MyT.datainfo) d as MyD "
+			+ "where MyD.inferenceprovenance = 'bulktagging'";
+
+		org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
+
+		idExplodeCommunity.show(false);
+
+		Assertions.assertEquals(3, idExplodeCommunity.count());
+		Assertions
+			.assertEquals(
+				3, idExplodeCommunity.filter("provenance = 'community:organization'").count());
+		Assertions
+			.assertEquals(
+				3,
+				idExplodeCommunity
+					.filter("name = 'Bulktagging for Community - Organization'")
+					.count());
+
+		Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'netherlands'").count());
+		Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'beopen'").count());
+		Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count());
+
+	}
+
+	@Test
+	void projectTag() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
+			.getPath();
+		LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+		fs
+			.copyFromLocalFile(
+				false, new org.apache.hadoop.fs.Path(getClass()
+					.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
+					.getPath()),
+				new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
+		SparkBulkTagJob
+			.main(
+				new String[] {
+
+					"-isSparkSessionManaged", Boolean.FALSE.toString(),
+					"-sourcePath", sourcePath,
+					"-taggingConf", taggingConf,
+
+					"-outputPath", workingDir.toString() + "/",
+					"-baseURL", "https://services.openaire.eu/openaire/community/",
+
+					"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
+					"-nameNode", "local"
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Project> tmp = sc
+			.textFile(workingDir.toString() + "/project")
+			.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
+
+		Assertions.assertEquals(4, tmp.count());
+		org.apache.spark.sql.Dataset<Project> verificationDataset = spark
+			.createDataset(tmp.rdd(), Encoders.bean(Project.class));
+
+		verificationDataset.createOrReplaceTempView("project");
+
+		String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
+			+ "from project "
+			+ "lateral view explode(context) c as MyT "
+			+ "lateral view explode(MyT.datainfo) d as MyD "
+			+ "where MyD.inferenceprovenance = 'bulktagging'";
+
+		org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
+
+		idExplodeCommunity.show(false);
+
+		Assertions.assertEquals(4, idExplodeCommunity.count());
+		Assertions
+			.assertEquals(
+				4, idExplodeCommunity.filter("provenance = 'community:project'").count());
+		Assertions
+			.assertEquals(
+				4,
+				idExplodeCommunity
+					.filter("name = 'Bulktagging for Community - Project'")
+					.count());
+
+		Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'enermaps'").count());
+		Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count());
+		Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count());
+
+	}
+
 	@Test
 	void bulktagByZenodoCommunityTest() throws Exception {
 		final String sourcePath = getClass()
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization
--- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project
+++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java
@ -14,4 +14,7 @@ public class ProvisionConstants {
 		return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
 	}

+	public static final String PUBLIC_ALIAS_NAME = "public";
+	public static final String SHADOW_ALIAS_NAME = "shadow";
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java
@ -9,6 +9,7 @@ import org.apache.commons.io.IOUtils;
 import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.UpdateResponse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -23,7 +24,7 @@ public class SolrAdminApplication implements Closeable {
 	private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class);

 	enum Action {
-		DELETE_BY_QUERY, COMMIT
+		DELETE_BY_QUERY, COMMIT, UPDATE_ALIASES
 	}

 	private final CloudSolrClient solrClient;
@ -39,9 +40,6 @@ public class SolrAdminApplication implements Closeable {
 		final String isLookupUrl = parser.get("isLookupUrl");
 		log.info("isLookupUrl: {}", isLookupUrl);

-		final String format = parser.get("format");
-		log.info("format: {}", format);
-
 		final Action action = Action.valueOf(parser.get("action"));
 		log.info("action: {}", action);

@ -59,11 +57,21 @@ public class SolrAdminApplication implements Closeable {
 		final String zkHost = isLookup.getZkHost();
 		log.info("zkHost: {}", zkHost);

-		final String collection = ProvisionConstants.getCollectionName(format);
-		log.info("collection: {}", collection);
+		final String publicFormat = parser.get("publicFormat");
+		log.info("publicFormat: {}", publicFormat);
+
+		final String shadowFormat = parser.get("shadowFormat");
+		log.info("shadowFormat: {}", shadowFormat);
+
+		// get collection names from metadata format profiles names
+		final String publicCollection = ProvisionConstants.getCollectionName(publicFormat);
+		log.info("publicCollection: {}", publicCollection);
+
+		final String shadowCollection = ProvisionConstants.getCollectionName(shadowFormat);
+		log.info("shadowCollection: {}", shadowCollection);

 		try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) {
-			app.execute(action, collection, query, commit);
+			app.execute(action, query, commit, publicCollection, shadowCollection);
 		}
 	}

@ -72,22 +80,29 @@ public class SolrAdminApplication implements Closeable {
 		this.solrClient = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot()).build();
 	}

-	public SolrResponse commit(String collection) throws IOException, SolrServerException {
-		return execute(Action.COMMIT, collection, null, true);
+	public SolrResponse commit(String shadowCollection) throws IOException, SolrServerException {
+		return execute(Action.COMMIT, null, true, null, shadowCollection);
 	}

-	public SolrResponse execute(Action action, String collection, String query, boolean commit)
+	public SolrResponse execute(Action action, String query, boolean commit,
+		String publicCollection, String shadowCollection)
 		throws IOException, SolrServerException {
 		switch (action) {

 			case DELETE_BY_QUERY:
-				UpdateResponse rsp = solrClient.deleteByQuery(collection, query);
+				UpdateResponse rsp = solrClient.deleteByQuery(shadowCollection, query);
 				if (commit) {
-					solrClient.commit(collection);
+					return solrClient.commit(shadowCollection);
 				}
 				return rsp;
+
 			case COMMIT:
-				return solrClient.commit(collection);
+				return solrClient.commit(shadowCollection);
+
+			case UPDATE_ALIASES:
+				this.updateAliases(publicCollection, shadowCollection);
+				return null;
+
 			default:
 				throw new IllegalArgumentException("action not managed: " + action);
 		}
@ -98,4 +113,30 @@ public class SolrAdminApplication implements Closeable {
 		solrClient.close();
 	}

+	private void updateAliases(String publicCollection, String shadowCollection)
+		throws SolrServerException, IOException {
+
+		// delete current aliases
+		this.deleteAlias(ProvisionConstants.PUBLIC_ALIAS_NAME);
+		this.deleteAlias(ProvisionConstants.SHADOW_ALIAS_NAME);
+
+		// create aliases
+		this.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, publicCollection);
+		this.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, shadowCollection);
+
+	}
+
+	public SolrResponse deleteAlias(String aliasName) throws SolrServerException, IOException {
+		CollectionAdminRequest.DeleteAlias deleteAliasRequest = CollectionAdminRequest.deleteAlias(aliasName);
+		log.info("deleting alias: {}", aliasName);
+		return deleteAliasRequest.process(solrClient);
+	}
+
+	public SolrResponse createAlias(String aliasName, String collection) throws IOException, SolrServerException {
+		CollectionAdminRequest.CreateAlias createAliasRequest = CollectionAdminRequest
+			.createAlias(aliasName, collection);
+		log.info("creating alias: {} for collection: {}", aliasName, collection);
+		return createAliasRequest.process(solrClient);
+	}
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java
@ -36,7 +36,7 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob {

 	private final String inputPath;

-	private final String format;
+	private final String shadowFormat;

 	private final String outputPath;

@ -61,8 +61,8 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob {
 		final String inputPath = parser.get("inputPath");
 		log.info("inputPath: {}", inputPath);

-		final String format = parser.get("format");
-		log.info("format: {}", format);
+		final String shadowFormat = parser.get("shadowFormat");
+		log.info("shadowFormat: {}", shadowFormat);

 		final String outputPath = Optional
 			.ofNullable(parser.get("outputPath"))
@ -95,27 +95,24 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob {
 				final String isLookupUrl = parser.get("isLookupUrl");
 				log.info("isLookupUrl: {}", isLookupUrl);
 				final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
-				new SolrRecordDumpJob(spark, inputPath, format, outputPath).run(isLookup);
+				new SolrRecordDumpJob(spark, inputPath, shadowFormat, outputPath).run(isLookup);
 			});
 	}

-	public SolrRecordDumpJob(SparkSession spark, String inputPath, String format, String outputPath) {
+	public SolrRecordDumpJob(SparkSession spark, String inputPath, String shadowFormat, String outputPath) {
 		this.spark = spark;
 		this.inputPath = inputPath;
-		this.format = format;
+		this.shadowFormat = shadowFormat;
 		this.outputPath = outputPath;
 	}

 	public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException {
-		final String fields = isLookup.getLayoutSource(format);
+		final String fields = isLookup.getLayoutSource(shadowFormat);
 		log.info("fields: {}", fields);

 		final String xslt = isLookup.getLayoutTransformer();

-		final String dsId = isLookup.getDsId(format);
-		log.info("dsId: {}", dsId);
-
-		final String indexRecordXslt = getLayoutTransformer(format, fields, xslt);
+		final String indexRecordXslt = getLayoutTransformer(shadowFormat, fields, xslt);
 		log.info("indexRecordTransformer {}", indexRecordXslt);

 		final Encoder<TupleWrapper> encoder = Encoders.bean(TupleWrapper.class);
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java
@ -40,6 +40,8 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob {

 	private final String format;

+	private final String shadowCollection;
+
 	private final int batchSize;

 	private final SparkSession spark;
@ -63,8 +65,11 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob {
 		final String inputPath = parser.get("inputPath");
 		log.info("inputPath: {}", inputPath);

-		final String format = parser.get("format");
-		log.info("format: {}", format);
+		final String shadowFormat = parser.get("shadowFormat");
+		log.info("shadowFormat: {}", shadowFormat);
+
+		final String shadowCollection = ProvisionConstants.getCollectionName(shadowFormat);
+		log.info("shadowCollection: {}", shadowCollection);

 		final Integer batchSize = Optional
 			.ofNullable(parser.get("batchSize"))
@ -85,15 +90,17 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob {
 				final String isLookupUrl = parser.get("isLookupUrl");
 				log.info("isLookupUrl: {}", isLookupUrl);
 				final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
-				new XmlIndexingJob(spark, inputPath, format, batchSize)
+				new XmlIndexingJob(spark, inputPath, shadowFormat, shadowCollection, batchSize)
 					.run(isLookup);
 			});
 	}

-	public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize) {
+	public XmlIndexingJob(SparkSession spark, String inputPath, String format, String shadowCollection,
+		Integer batchSize) {
 		this.spark = spark;
 		this.inputPath = inputPath;
 		this.format = format;
+		this.shadowCollection = shadowCollection;
 		this.batchSize = batchSize;
 	}

@ -103,12 +110,6 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob {

 		final String xslt = isLookup.getLayoutTransformer();

-		final String dsId = isLookup.getDsId(format);
-		log.info("dsId: {}", dsId);
-
-		final String collection = ProvisionConstants.getCollectionName(format);
-		log.info("collection: {}", collection);
-
 		final String zkHost = isLookup.getZkHost();
 		log.info("zkHost: {}", zkHost);

@ -130,7 +131,7 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob {
 			.javaRDD()
 			.map(
 				t -> new StreamingInputDocumentFactory().parseDocument(t.getXml(), t.getJson()));
-		DHPSolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
+		DHPSolrSupport.indexDocs(zkHost, shadowCollection, batchSize, docs.rdd());
 	}

 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java
@ -30,11 +30,14 @@ import eu.dnetlib.dhp.schema.solr.Context;
 import eu.dnetlib.dhp.schema.solr.Country;
 import eu.dnetlib.dhp.schema.solr.Datasource;
 import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
+import eu.dnetlib.dhp.schema.solr.ExternalReference;
 import eu.dnetlib.dhp.schema.solr.Instance;
 import eu.dnetlib.dhp.schema.solr.Journal;
+import eu.dnetlib.dhp.schema.solr.Measure;
 import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
 import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
 import eu.dnetlib.dhp.schema.solr.Organization;
+import eu.dnetlib.dhp.schema.solr.Pid;
 import eu.dnetlib.dhp.schema.solr.Project;
 import eu.dnetlib.dhp.schema.solr.Result;
 import eu.dnetlib.dhp.schema.solr.Subject;
@ -76,6 +79,7 @@ public class ProvisionModelSupport {
 		r.setCollectedfrom(asProvenance(e.getCollectedfrom()));
 		r.setContext(asContext(e.getContext(), contextMapper));
 		r.setPid(asPid(e.getPid()));
+		r.setMeasures(mapMeasures(e.getMeasures()));

 		if (e instanceof eu.dnetlib.dhp.schema.oaf.Result) {
 			r.setResult(mapResult((eu.dnetlib.dhp.schema.oaf.Result) e));
@ -106,6 +110,14 @@ public class ProvisionModelSupport {
 		final RelatedEntity re = rew.getTarget();
 		final RecordType relatedRecordType = RecordType.valueOf(re.getType());
 		final Relation relation = rew.getRelation();
+		final String relationProvenance = Optional
+			.ofNullable(relation.getDataInfo())
+			.map(
+				d -> Optional
+					.ofNullable(d.getProvenanceaction())
+					.map(Qualifier::getClassid)
+					.orElse(null))
+			.orElse(null);
 		rr
 			.setHeader(
 				RelatedRecordHeader
@ -113,7 +125,9 @@ public class ProvisionModelSupport {
 						relation.getRelType(),
 						relation.getRelClass(),
 						StringUtils.substringAfter(relation.getTarget(), IdentifierFactory.ID_PREFIX_SEPARATOR),
-						relatedRecordType));
+						relatedRecordType,
+						relationProvenance,
+						Optional.ofNullable(relation.getDataInfo()).map(DataInfo::getTrust).orElse(null)));

 		rr.setAcronym(re.getAcronym());
 		rr.setCode(re.getCode());
@ -131,11 +145,20 @@ public class ProvisionModelSupport {
 		rr.setOfficialname(re.getOfficialname());
 		rr.setOpenairecompatibility(mapCodeLabel(re.getOpenairecompatibility()));
 		rr.setPid(asPid(re.getPid()));
-		rr.setProjectTitle(rr.getProjectTitle());
+		rr.setWebsiteurl(re.getWebsiteurl());
+		rr.setProjectTitle(re.getProjectTitle());
 		rr.setPublisher(re.getPublisher());
 		rr.setResulttype(mapQualifier(re.getResulttype()));
 		rr.setTitle(Optional.ofNullable(re.getTitle()).map(StructuredProperty::getValue).orElse(null));

+		if (relation.getValidated() == null) {
+			relation.setValidated(false);
+		}
+		if (ModelConstants.OUTCOME.equals(relation.getSubRelType())
+			&& StringUtils.isNotBlank(relation.getValidationDate())) {
+			rr.setValidationDate(relation.getValidationDate());
+		}
+
 		return rr;
 	}

@ -266,6 +289,7 @@ public class ProvisionModelSupport {
 		ds.setOfficialname(mapField(d.getOfficialname()));
 		ds.setDescription(mapField(d.getDescription()));
 		ds.setJournal(mapJournal(d.getJournal()));
+		ds.setWebsiteurl(mapField(d.getWebsiteurl()));
 		ds.setLogourl(mapField(d.getLogourl()));
 		ds.setAccessinfopackage(mapFieldList(d.getAccessinfopackage()));
 		ds.setCertificates(mapField(d.getCertificates()));
@ -311,6 +335,7 @@ public class ProvisionModelSupport {
 		ds.setSubjects(asSubjectSP(d.getSubjects()));
 		ds.setSubmissionpolicyurl(d.getSubmissionpolicyurl());
 		ds.setThematic(d.getThematic());
+		ds.setContentpolicies(mapCodeLabel(d.getContentpolicies()));
 		ds.setVersioncontrol(d.getVersioncontrol());
 		ds.setVersioning(mapField(d.getVersioning()));

@ -326,6 +351,7 @@ public class ProvisionModelSupport {
 		rs.setOtherTitles(getOtherTitles(r.getTitle()));
 		rs.setDescription(mapFieldList(r.getDescription()));
 		rs.setSubject(asSubject(r.getSubject()));
+		rs.setLanguage(asLanguage(r.getLanguage()));
 		rs.setPublicationdate(mapField(r.getDateofacceptance()));
 		rs.setPublisher(mapField(r.getPublisher()));
 		rs.setEmbargoenddate(mapField(r.getEmbargoenddate()));
@ -341,17 +367,17 @@ public class ProvisionModelSupport {
 		rs.setCountry(asCountry(r.getCountry()));
 		rs.setEoscifguidelines(asEOSCIF(r.getEoscifguidelines()));

-		rs.setGreen(r.getIsGreen());
+		rs.setIsGreen(r.getIsGreen());
 		rs
 			.setOpenAccessColor(
 				Optional
 					.ofNullable(r.getOpenAccessColor())
 					.map(color -> OpenAccessColor.valueOf(color.toString()))
 					.orElse(null));
-		rs.setInDiamondJournal(r.getIsInDiamondJournal());
+		rs.setIsInDiamondJournal(r.getIsInDiamondJournal());
 		rs.setPubliclyFunded(r.getPubliclyFunded());
 		rs.setTransformativeAgreement(r.getTransformativeAgreement());
-
+		rs.setExternalReference(mapExternalReference(r.getExternalReference()));
 		rs.setInstance(mapInstances(r.getInstance()));

 		if (r instanceof Publication) {
@ -375,6 +401,13 @@ public class ProvisionModelSupport {
 		return rs;
 	}

+	private static Language asLanguage(Qualifier lang) {
+		return Optional
+			.ofNullable(lang)
+			.map(q -> Language.newInstance(q.getClassid(), q.getClassname()))
+			.orElse(null);
+	}
+
 	@Nullable
 	private static List<String> getOtherTitles(List<StructuredProperty> titleList) {
 		return Optional
@ -422,7 +455,7 @@ public class ProvisionModelSupport {
 						Instance i = new Instance();
 						i.setCollectedfrom(asProvenance(instance.getCollectedfrom()));
 						i.setHostedby(asProvenance(instance.getHostedby()));
-						i.setFulltext(i.getFulltext());
+						i.setFulltext(instance.getFulltext());
 						i.setPid(asPid(instance.getPid()));
 						i.setAlternateIdentifier(asPid(instance.getAlternateIdentifier()));
 						i.setAccessright(mapAccessRight(instance.getAccessright()));
@ -453,7 +486,8 @@ public class ProvisionModelSupport {
 	private static AccessRight mapAccessRight(eu.dnetlib.dhp.schema.oaf.AccessRight accessright) {
 		return AccessRight
 			.newInstance(
-				mapQualifier(accessright),
+				accessright.getClassid(),
+				accessright.getClassname(),
 				Optional
 					.ofNullable(accessright.getOpenAccessRoute())
 					.map(route -> OpenAccessRoute.valueOf(route.toString()))
@ -508,7 +542,46 @@ public class ProvisionModelSupport {
 	}

 	private static Provenance asProvenance(KeyValue keyValue) {
-		return Optional.ofNullable(keyValue).map(cf -> Provenance.newInstance(cf.getKey(), cf.getValue())).orElse(null);
+		return Optional
+			.ofNullable(keyValue)
+			.map(
+				kv -> Provenance
+					.newInstance(
+						StringUtils.substringAfter(kv.getKey(), IdentifierFactory.ID_PREFIX_SEPARATOR),
+						kv.getValue()))
+			.orElse(null);
+	}
+
+	private static List<Measure> mapMeasures(List<eu.dnetlib.dhp.schema.oaf.Measure> measures) {
+		return Optional
+			.ofNullable(measures)
+			.map(
+				ml -> ml
+					.stream()
+					.map(m -> Measure.newInstance(m.getId(), mapCodeLabelKV(m.getUnit())))
+					.collect(Collectors.toList()))
+			.orElse(null);
+	}
+
+	private static List<ExternalReference> mapExternalReference(
+		List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
+		return Optional
+			.ofNullable(externalReference)
+			.map(
+				ext -> ext
+					.stream()
+					.map(
+						e -> ExternalReference
+							.newInstance(
+								e.getSitename(),
+								e.getLabel(),
+								e.getAlternateLabel(),
+								e.getUrl(),
+								mapCodeLabel(e.getQualifier()),
+								e.getRefidentifier(),
+								e.getQuery()))
+					.collect(Collectors.toList()))
+			.orElse(Lists.newArrayList());
 	}

 	private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
@ -529,7 +602,7 @@ public class ProvisionModelSupport {
 		}

 		return Optional
-			.ofNullable(contexts)
+			.of(contexts)
 			.map(
 				ctx -> ctx
 					.stream()
@ -581,7 +654,14 @@ public class ProvisionModelSupport {
 			.map(
 				pids -> pids
 					.stream()
-					.map(p -> Pid.newInstance(p.getQualifier().getClassname(), p.getValue()))
+					.filter(p -> Objects.nonNull(p.getQualifier()))
+					.filter(p -> Objects.nonNull(p.getQualifier().getClassid()))
+					.map(
+						p -> Pid
+							.newInstance(
+								p.getValue(),
+								p.getQualifier().getClassid(),
+								p.getQualifier().getClassname()))
 					.collect(Collectors.toList()))
 			.orElse(null);
 	}
@ -607,7 +687,9 @@ public class ProvisionModelSupport {
 					.stream()
 					.filter(s -> Objects.nonNull(s.getQualifier()))
 					.filter(s -> Objects.nonNull(s.getQualifier().getClassname()))
-					.map(s -> Subject.newInstance(s.getValue(), s.getQualifier().getClassname()))
+					.map(
+						s -> Subject
+							.newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname()))
 					.collect(Collectors.toList()))
 			.orElse(null);
 	}
@ -620,7 +702,9 @@ public class ProvisionModelSupport {
 					.stream()
 					.filter(s -> Objects.nonNull(s.getQualifier()))
 					.filter(s -> Objects.nonNull(s.getQualifier().getClassname()))
-					.map(s -> Subject.newInstance(s.getValue(), s.getQualifier().getClassname()))
+					.map(
+						s -> Subject
+							.newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname()))
 					.collect(Collectors.toList()))
 			.orElse(null);
 	}
@ -689,7 +773,7 @@ public class ProvisionModelSupport {
 	private static CodeLabel mapCodeLabel(KeyValue kv) {
 		return Optional
 			.ofNullable(kv)
-			.map(q -> CodeLabel.newInstance(kv.getKey(), kv.getValue()))
+			.map(k -> CodeLabel.newInstance(k.getKey(), k.getValue()))
 			.orElse(null);
 	}

--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -219,6 +219,13 @@ public class XmlRecordFactory implements Serializable {
 		if (entity.getMeasures() != null) {
 			metadata.addAll(measuresAsXml(entity.getMeasures()));
 		}
+		if (entity.getContext() != null) {
+			contexts.addAll(entity.getContext().stream().map(Context::getId).collect(Collectors.toList()));
+			/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
+			if (contexts.contains("dh-ch::subcommunity::2")) {
+				contexts.add("clarin");
+			}
+		}

 		if (ModelSupport.isResult(type)) {
 			final Result r = (Result) entity;
@ -245,14 +252,6 @@ public class XmlRecordFactory implements Serializable {
 							.collect(Collectors.toList()));
 			}

-			if (r.getContext() != null) {
-				contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
-				/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
-				if (contexts.contains("dh-ch::subcommunity::2")) {
-					contexts.add("clarin");
-				}
-			}
-
 			if (r.getTitle() != null) {
 				metadata
 					.addAll(
@ -1315,7 +1314,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getCollectedfrom()
 									.stream()
-									.filter(cf -> kvNotBlank(cf))
+									.filter(XmlRecordFactory::kvNotBlank)
 									.map(cf -> XmlSerializationUtils.mapKeyValue("collectedfrom", cf))
 									.collect(Collectors.toList()));
 					}
@ -1326,7 +1325,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getHostedby()
 									.stream()
-									.filter(hb -> kvNotBlank(hb))
+									.filter(XmlRecordFactory::kvNotBlank)
 									.map(hb -> XmlSerializationUtils.mapKeyValue("hostedby", hb))
 									.collect(Collectors.toList()));
 					}
@ -1336,7 +1335,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getDateofacceptance()
 									.stream()
-									.filter(d -> isNotBlank(d))
+									.filter(StringUtils::isNotBlank)
 									.map(d -> XmlSerializationUtils.asXmlElement("dateofacceptance", d))
 									.collect(Collectors.toList()));
 					}
@ -1346,7 +1345,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getInstancetype()
 									.stream()
-									.filter(t -> !StringUtils.isNotBlank(t.getClassid()))
+									.filter(t -> StringUtils.isNotBlank(t.getClassid()))
 									.map(t -> XmlSerializationUtils.mapQualifier("instancetype", t))
 									.collect(Collectors.toList()));
 					}
@ -1356,7 +1355,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getDistributionlocation()
 									.stream()
-									.filter(d -> isNotBlank(d))
+									.filter(StringUtils::isNotBlank)
 									.map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d))
 									.collect(Collectors.toList()));
 					}
@ -1409,7 +1408,7 @@ public class XmlRecordFactory implements Serializable {
 								instance
 									.getLicense()
 									.stream()
-									.filter(d -> isNotBlank(d))
+									.filter(StringUtils::isNotBlank)
 									.map(d -> XmlSerializationUtils.asXmlElement("license", d))
 									.collect(Collectors.toList()));
 					}
@ -1540,11 +1539,16 @@ public class XmlRecordFactory implements Serializable {
 					.min(new RefereedComparator())
 					.orElse(XmlInstance.UNKNOWN_REVIEW_LEVEL));

+		Map<String, Qualifier> instanceTypes = Maps.newHashMap();
+
 		instances.forEach(p -> {
 			final Instance i = p.getRight();
 			instance.getCollectedfrom().add(i.getCollectedfrom());
 			instance.getHostedby().add(i.getHostedby());
-			instance.getInstancetype().add(i.getInstancetype());
+
+			if (Optional.ofNullable(i.getInstancetype()).map(Qualifier::getClassid).isPresent()) {
+				instanceTypes.putIfAbsent(i.getInstancetype().getClassid(), i.getInstancetype());
+			}
 			instance
 				.setProcessingchargeamount(
 					Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null));
@ -1571,6 +1575,8 @@ public class XmlRecordFactory implements Serializable {
 				.ifPresent(instance::setFulltext);
 		});

+		instance.getInstancetype().addAll(instanceTypes.values());
+
 		if (instance.getHostedby().size() > 1
 			&& instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) {
 			instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY);
@ -1596,9 +1602,7 @@ public class XmlRecordFactory implements Serializable {
 	private List<String> buildContexts(final String type, final Set<String> contexts) {
 		final List<String> res = Lists.newArrayList();

-		if (contextMapper != null
-			&& !contextMapper.isEmpty()
-			&& MainEntityType.result.toString().equals(type)) {
+		if (contextMapper != null && !contextMapper.isEmpty()) {

 			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");

--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json
@ -13,7 +13,7 @@
  },
  {
    "paramName": "f",
-    "paramLongName": "format",
+    "paramLongName": "shadowFormat",
    "paramDescription": "MDFormat name found in the IS profile",
    "paramRequired": true
  },
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json
@ -13,8 +13,8 @@
  },
  {
    "paramName": "f",
-    "paramLongName": "format",
-    "paramDescription": "MDFormat name found in the IS profile",
+    "paramLongName": "shadowFormat",
+    "paramDescription": "MDFormat name found in the IS profile bound to the shadow index collection to feed",
    "paramRequired": true
  },
  {
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json
@ -5,12 +5,6 @@
    "paramDescription": "the URL to the ISLookUp Service",
    "paramRequired": true
  },
-  {
-    "paramName": "f",
-    "paramLongName": "format",
-    "paramDescription": "metadata format profile name",
-    "paramRequired": true
-  },
  {
    "paramName": "a",
    "paramLongName": "action",
@ -28,5 +22,18 @@
    "paramLongName": "commit",
    "paramDescription": "should the action be followed by a commit?",
    "paramRequired": false
+  },
+  {
+    "paramName": "pf",
+    "paramLongName": "publicFormat",
+    "paramDescription": "the name of the public metadata format profile - used to create an alias",
+    "paramRequired": false
+  },
+  {
+    "paramName": "sf",
+    "paramLongName": "shadowFormat",
+    "paramDescription": "the name of the shadow metadata format profile - used to create an alias",
+    "paramRequired": false
  }
+
 ]
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
@ -35,7 +35,7 @@
            <description>maximum number of relations allowed for a each entity grouping by target</description>
        </property>
        <property>
-            <name>format</name>
+            <name>shadowFormat</name>
            <description>metadata format name (DMF|TMF)</description>
        </property>
        <property>
@ -133,6 +133,7 @@
            <case to="create_payloads">${wf:conf('resumeFrom') eq 'create_payloads'}</case>
            <case to="drop_solr_collection">${wf:conf('resumeFrom') eq 'drop_solr_collection'}</case>
            <case to="to_solr_index">${wf:conf('resumeFrom') eq 'to_solr_index'}</case>
+            <case to="update_solr_aliases">${wf:conf('resumeFrom') eq 'update_solr_aliases'}</case>
            <default to="prepare_relations"/>
        </switch>
    </decision>
@ -641,8 +642,8 @@
            </configuration>
            <main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--format</arg><arg>${format}</arg>
            <arg>--action</arg><arg>DELETE_BY_QUERY</arg>
+            <arg>--shadowFormat</arg><arg>${shadowFormat}</arg>
            <arg>--query</arg><arg>${solrDeletionQuery}</arg>
            <arg>--commit</arg><arg>true</arg>
        </java>
@ -672,7 +673,7 @@
            </spark-opts>
            <arg>--inputPath</arg><arg>${workingDir}/xml_json</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--format</arg><arg>${format}</arg>
+            <arg>--shadowFormat</arg><arg>${shadowFormat}</arg>
            <arg>--batchSize</arg><arg>${batchSize}</arg>
        </spark>
        <ok to="commit_solr_collection"/>
@ -689,7 +690,7 @@
            </configuration>
            <main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--format</arg><arg>${format}</arg>
+            <arg>--shadowFormat</arg><arg>${shadowFormat}</arg>
            <arg>--action</arg><arg>COMMIT</arg>
        </java>
        <ok to="End"/>
@ -714,12 +715,31 @@
            </spark-opts>
            <arg>--inputPath</arg><arg>${workingDir}/xml_json</arg>
            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
-            <arg>--format</arg><arg>${format}</arg>
+            <arg>--shadowFormat</arg><arg>${shadowFormat}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/solr_documents</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>

+    <!-- Action that updates the solr core aliases - out of order execution, only using the 'resume_from' param -->
+    <action name="update_solr_aliases">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+            <arg>--action</arg><arg>UPDATE_ALIASES</arg>
+            <arg>--publicFormat</arg><arg>${publicFormat}</arg>
+            <arg>--shadowFormat</arg><arg>${shadowFormat}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java
@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;

 import java.io.IOException;
+import java.io.StringReader;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Arrays;
@ -16,6 +17,9 @@ import javax.xml.transform.TransformerException;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrInputDocument;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@ -34,7 +38,6 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;

 /**
 * This test can be used to produce a record that can be manually fed to Solr in XML format.
- *
 * The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities.
 */
 public class IndexRecordTransformerTest {
@ -54,7 +57,7 @@ public class IndexRecordTransformerTest {
 	}

 	@Test
-	public void testPublicationRecordTransformation() throws IOException, TransformerException {
+	public void testPublicationRecordTransformation() throws IOException, TransformerException, DocumentException {

 		final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
 			PayloadConverterJob.schemaLocation);
@ -71,11 +74,15 @@ public class IndexRecordTransformerTest {
 						new RelatedEntityWrapper(rel,
 							CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));

-		final String record = xmlRecordFactory.build(je);
+		final String xmlRecord = xmlRecordFactory.build(je);

-		assertNotNull(record);
+		assertNotNull(xmlRecord);

-		testRecordTransformation(record);
+		Document doc = new SAXReader().read(new StringReader(xmlRecord));
+
+		assertEquals("Article", doc.valueOf("//children/instance/instancetype/@classname"));
+
+		testRecordTransformation(xmlRecord);
 	}

 	@Test
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
@ -4,16 +4,20 @@ package eu.dnetlib.dhp.oa.provision;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;

+import org.apache.solr.client.solrj.request.SolrPing;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.client.solrj.response.SolrPingResponse;
 import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;

 class SolrAdminApplicationTest extends SolrTest {

 	@Test
 	void testPing() throws Exception {
-		SolrPingResponse pingResponse = miniCluster.getSolrClient().ping();
+		final SolrPing ping = new SolrPing();
+		ping.getParams().set("collection", ProvisionConstants.SHADOW_ALIAS_NAME);
+		SolrPingResponse pingResponse = ping.process(miniCluster.getSolrClient());
+
 		log.info("pingResponse: '{}'", pingResponse.getStatus());
 		assertEquals(0, pingResponse.getStatus());
 	}
@ -24,7 +28,7 @@ class SolrAdminApplicationTest extends SolrTest {
 		SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost());

 		UpdateResponse rsp = (UpdateResponse) admin
-			.execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false);
+			.execute(SolrAdminApplication.Action.DELETE_BY_QUERY, "*:*", false, null, SHADOW_COLLECTION);

 		assertEquals(0, rsp.getStatus());
 	}
@ -34,9 +38,30 @@ class SolrAdminApplicationTest extends SolrTest {

 		SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost());

-		UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION);
+		UpdateResponse rsp = (UpdateResponse) admin.commit(SHADOW_COLLECTION);

 		assertEquals(0, rsp.getStatus());
 	}

+	@Test
+	void testAdminApplication_CREATE_ALIAS() throws Exception {
+
+		SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost());
+
+		CollectionAdminResponse rsp = (CollectionAdminResponse) admin
+			.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, SHADOW_COLLECTION);
+		assertEquals(0, rsp.getStatus());
+
+	}
+
+	@Test
+	void testAdminApplication_DELETE_ALIAS() throws Exception {
+
+		SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost());
+
+		CollectionAdminResponse rsp = (CollectionAdminResponse) admin.deleteAlias(ProvisionConstants.PUBLIC_ALIAS_NAME);
+		assertEquals(0, rsp.getStatus());
+
+	}
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java
@ -1,21 +1,40 @@

 package eu.dnetlib.dhp.oa.provision;

+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.nio.file.Path;

+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.embedded.JettyConfig;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.MiniSolrCloudCluster;
 import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CoreAdminParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.junit.jupiter.MockitoExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
 import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
@ -23,7 +42,18 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

@ExtendWith(MockitoExtension.class)
-public class SolrConfigExploreTest extends SolrExploreTest {
+public class SolrConfigExploreTest {
+
+	protected static final Logger log = LoggerFactory.getLogger(SolrConfigExploreTest.class);
+
+	protected static final String SHADOW_FORMAT = "c1";
+	protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire";
+	protected static final String PUBLIC_FORMAT = "c2";
+	protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire";
+
+	protected static final String CONFIG_NAME = "testConfig";
+
+	protected static SolrAdminApplication admin;

 	protected static SparkSession spark;

@ -35,15 +65,17 @@ public class SolrConfigExploreTest extends SolrExploreTest {
 	@Mock
 	private ISLookupClient isLookupClient;

+	@TempDir
+	public static Path workingDir;
+
+	protected static MiniSolrCloudCluster miniCluster;
+
 	@BeforeEach
 	public void prepareMocks() throws ISLookUpException, IOException {
 		isLookupClient.setIsLookup(isLookUpService);

 		int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort();

-		Mockito
-			.when(isLookupClient.getDsId(Mockito.anyString()))
-			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort));
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
@ -54,7 +86,7 @@ public class SolrConfigExploreTest extends SolrExploreTest {
 	}

 	@BeforeAll
-	public static void before() {
+	public static void setup() throws Exception {

 		SparkConf conf = new SparkConf();
 		conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
@ -70,15 +102,75 @@ public class SolrConfigExploreTest extends SolrExploreTest {

 		spark = SparkSession
 			.builder()
-			.appName(XmlIndexingJobTest.class.getSimpleName())
+			.appName(SolrConfigExploreTest.class.getSimpleName())
 			.config(conf)
 			.getOrCreate();

+		// random unassigned HTTP port
+		final int jettyPort = 0;
+		final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
+
+		log.info(String.format("working directory: %s", workingDir.toString()));
+		System.setProperty("solr.log.dir", workingDir.resolve("logs").toString());
+
+		// create a MiniSolrCloudCluster instance
+		miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig);
+
+		// Upload Solr configuration directory to ZooKeeper
+		String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig";
+		File configDir = new File(solrZKConfigDir);
+
+		miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
+
+		// override settings in the solrconfig include
+		System.setProperty("solr.tests.maxBufferedDocs", "100000");
+		System.setProperty("solr.tests.maxIndexingThreads", "-1");
+		System.setProperty("solr.tests.ramBufferSizeMB", "100");
+
+		// use non-test classes so RandomizedRunner isn't necessary
+		System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
+		System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
+		System.setProperty("solr.lock.type", "single");
+
+		log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
+		log
+			.info(
+				CollectionAdminRequest.ClusterStatus
+					.getClusterStatus()
+					.process(miniCluster.getSolrClient())
+					.toString());
+
+		NamedList<Object> res = createCollection(
+			miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
+		res.forEach(o -> log.info(o.toString()));
+
+		// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
+
+		res = createCollection(
+			miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
+		res.forEach(o -> log.info(o.toString()));
+
+		admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
+		CollectionAdminResponse rsp = (CollectionAdminResponse) admin
+			.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
+		assertEquals(0, rsp.getStatus());
+		rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
+		assertEquals(0, rsp.getStatus());
+
+		log
+			.info(
+				CollectionAdminRequest.ClusterStatus
+					.getClusterStatus()
+					.process(miniCluster.getSolrClient())
+					.toString());
+
 	}

 	@AfterAll
-	public static void tearDown() {
+	public static void tearDown() throws Exception {
 		spark.stop();
+		miniCluster.shutdown();
+		FileUtils.deleteDirectory(workingDir.toFile());
 	}

 	@Test
@ -86,8 +178,10 @@ public class SolrConfigExploreTest extends SolrExploreTest {

 		String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";

-		new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient);
-		Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
+		new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
+			.run(isLookupClient);
+		Assertions
+			.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());

 		String[] queryStrings = {
 			"cancer",
@ -101,14 +195,14 @@ public class SolrConfigExploreTest extends SolrExploreTest {
 			query.add(CommonParams.Q, q);
 			query.set("debugQuery", "on");

-			log.info("Submit query to Solr with params: {}", query.toString());
-			QueryResponse rsp = miniCluster.getSolrClient().query(query);
+			log.info("Submit query to Solr with params: {}", query);
+			QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query);
 //            System.out.println(rsp.getHighlighting());
 //            System.out.println(rsp.getExplainMap());

 			for (SolrDocument doc : rsp.getResults()) {
-				System.out
-					.println(
+				log
+					.info(
 						doc.get("score") + "\t" +
 							doc.get("__indexrecordidentifier") + "\t" +
 							doc.get("resultidentifier") + "\t" +
@ -122,4 +216,18 @@ public class SolrConfigExploreTest extends SolrExploreTest {
 			}
 		}
 	}
+
+	protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
+		int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
+		ModifiableSolrParams modParams = new ModifiableSolrParams();
+		modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
+		modParams.set("name", name);
+		modParams.set("numShards", numShards);
+		modParams.set("replicationFactor", replicationFactor);
+		modParams.set("collection.configName", configName);
+		modParams.set("maxShardsPerNode", maxShardsPerNode);
+		QueryRequest request = new QueryRequest(modParams);
+		request.setPath("/admin/collections");
+		return client.request(request);
+	}
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java
@ -2,24 +2,15 @@
 package eu.dnetlib.dhp.oa.provision;

 import java.io.IOException;
-import java.io.StringReader;
 import java.net.URI;

 import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
-import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.Mock;
@ -50,9 +41,6 @@ public class SolrConfigTest extends SolrTest {

 		int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort();

-		Mockito
-			.when(isLookupClient.getDsId(Mockito.anyString()))
-			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort));
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
@ -95,9 +83,10 @@ public class SolrConfigTest extends SolrTest {

 		String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml";

-		new XmlIndexingJob(spark, inputPath, FORMAT, batchSize)
+		new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
 			.run(isLookupClient);
-		Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
+		Assertions
+			.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());

 		String[] queryStrings = {
 			"cancer",
@ -109,8 +98,8 @@ public class SolrConfigTest extends SolrTest {
 			SolrQuery query = new SolrQuery();
 			query.add(CommonParams.Q, q);

-			log.info("Submit query to Solr with params: {}", query.toString());
-			QueryResponse rsp = miniCluster.getSolrClient().query(query);
+			log.info("Submit query to Solr with params: {}", query);
+			QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query);

 			for (SolrDocument doc : rsp.getResults()) {
 				System.out
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java
@ -34,58 +34,6 @@ public abstract class SolrExploreTest {
 	@TempDir
 	public static Path workingDir;

-	@BeforeAll
-	public static void setup() throws Exception {
-
-		// random unassigned HTTP port
-		final int jettyPort = 0;
-		final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build();
-
-		log.info(String.format("working directory: %s", workingDir.toString()));
-		System.setProperty("solr.log.dir", workingDir.resolve("logs").toString());
-
-		// create a MiniSolrCloudCluster instance
-		miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig);
-
-		// Upload Solr configuration directory to ZooKeeper
-		String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig";
-		File configDir = new File(solrZKConfigDir);
-
-		miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME);
-
-		// override settings in the solrconfig include
-		System.setProperty("solr.tests.maxBufferedDocs", "100000");
-		System.setProperty("solr.tests.maxIndexingThreads", "-1");
-		System.setProperty("solr.tests.ramBufferSizeMB", "100");
-
-		// use non-test classes so RandomizedRunner isn't necessary
-		System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
-		System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory");
-		System.setProperty("solr.lock.type", "single");
-
-		log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
-		log
-			.info(
-				CollectionAdminRequest.ClusterStatus
-					.getClusterStatus()
-					.process(miniCluster.getSolrClient())
-					.toString());
-
-		NamedList<Object> res = createCollection(
-			miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME);
-		res.forEach(o -> log.info(o.toString()));
-
-		miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION);
-
-		log
-			.info(
-				CollectionAdminRequest.ClusterStatus
-					.getClusterStatus()
-					.process(miniCluster.getSolrClient())
-					.toString());
-
-	}
-
 	@AfterAll
 	public static void shutDown() throws Exception {
 		miniCluster.shutdown();
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java
@ -1,6 +1,8 @@

 package eu.dnetlib.dhp.oa.provision;

+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 import java.io.File;
 import java.nio.file.Path;

@ -10,6 +12,7 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.cloud.MiniSolrCloudCluster;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CoreAdminParams;
@ -21,14 +24,21 @@ import org.junit.jupiter.api.io.TempDir;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import sun.security.provider.SHA;
+
 public abstract class SolrTest {

 	protected static final Logger log = LoggerFactory.getLogger(SolrTest.class);

-	protected static final String FORMAT = "test";
-	protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire";
+	protected static final String SHADOW_FORMAT = "c1";
+	protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire";
+	protected static final String PUBLIC_FORMAT = "c2";
+	protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire";
+
 	protected static final String CONFIG_NAME = "testConfig";

+	protected static SolrAdminApplication admin;
+
 	protected static MiniSolrCloudCluster miniCluster;

 	@TempDir
@ -72,10 +82,21 @@ public abstract class SolrTest {
 					.toString());

 		NamedList<Object> res = createCollection(
-			miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME);
+			miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
 		res.forEach(o -> log.info(o.toString()));

-		miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION);
+		// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
+
+		res = createCollection(
+			miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
+		res.forEach(o -> log.info(o.toString()));
+
+		admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
+		CollectionAdminResponse rsp = (CollectionAdminResponse) admin
+			.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
+		assertEquals(0, rsp.getStatus());
+		rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
+		assertEquals(0, rsp.getStatus());

 		log
 			.info(
@ -83,12 +104,12 @@ public abstract class SolrTest {
 					.getClusterStatus()
 					.process(miniCluster.getSolrClient())
 					.toString());
-
 	}

 	@AfterAll
 	public static void shutDown() throws Exception {
 		miniCluster.shutdown();
+		admin.close();
 		FileUtils.deleteDirectory(workingDir.toFile());
 	}

--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java
@ -10,6 +10,7 @@ import java.util.Optional;

 import org.apache.commons.io.IOUtils;
 import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.spark.SparkConf;
@ -50,9 +51,6 @@ public class XmlIndexingJobTest extends SolrTest {

 		int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort();

-		Mockito
-			.when(isLookupClient.getDsId(Mockito.anyString()))
-			.thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl");
 		Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort));
 		Mockito
 			.when(isLookupClient.getLayoutSource(Mockito.anyString()))
@ -103,46 +101,72 @@ public class XmlIndexingJobTest extends SolrTest {

 		long nRecord = records.count();

-		new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient);
+		new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
+			.run(isLookupClient);

-		assertEquals(0, miniCluster.getSolrClient().commit().getStatus());
+		assertEquals(0, miniCluster.getSolrClient().commit(SHADOW_COLLECTION).getStatus());

-		QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*"));
+		QueryResponse rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "*:*"));

 		assertEquals(
 			nRecord, rsp.getResults().getNumFound(),
 			"the number of indexed records should be equal to the number of input records");

-		rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true"));
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "isgreen:true"));
 		assertEquals(
-			0, rsp.getResults().getNumFound(),
+			4, rsp.getResults().getNumFound(),
 			"the number of indexed records having isgreen = true");

-		rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze"));
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze"));
 		assertEquals(
-			0, rsp.getResults().getNumFound(),
+			2, rsp.getResults().getNumFound(),
 			"the number of indexed records having openaccesscolor = bronze");

-		rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true"));
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true"));
 		assertEquals(
 			0, rsp.getResults().getNumFound(),
 			"the number of indexed records having isindiamondjournal = true");

-		rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true"));
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "publiclyfunded:true"));
 		assertEquals(
 			0, rsp.getResults().getNumFound(),
 			"the number of indexed records having publiclyfunded = true");

-		rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true"));
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
+				new SolrQuery().add(CommonParams.Q, "peerreviewed:true"));
 		assertEquals(
-			0, rsp.getResults().getNumFound(),
+			35, rsp.getResults().getNumFound(),
 			"the number of indexed records having peerreviewed = true");

 		rsp = miniCluster
 			.getSolrClient()
 			.query(
+				ProvisionConstants.SHADOW_ALIAS_NAME,
 				new SolrQuery()
-					.add(CommonParams.Q, "objidentifier:\"iddesignpres::ae77e56e84ad058d9e7f19fa2f7325db\"")
+					.add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"")
 					.add(CommonParams.FL, "__json"));
 		assertEquals(
 			1, rsp.getResults().getNumFound(),
@ -158,6 +182,22 @@ public class XmlIndexingJobTest extends SolrTest {

 		log.info((String) json.get());

+		admin
+			.execute(
+				SolrAdminApplication.Action.UPDATE_ALIASES, null, false,
+				SHADOW_COLLECTION, PUBLIC_COLLECTION);
+
+		rsp = miniCluster
+			.getSolrClient()
+			.query(
+				ProvisionConstants.PUBLIC_ALIAS_NAME,
+				new SolrQuery()
+					.add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"")
+					.add(CommonParams.FL, "__json"));
+
+		assertEquals(
+			1, rsp.getResults().getNumFound(),
+			"the number of indexed records having the given identifier, found in the public collection");
 	}

 }
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -1,8 +1,7 @@

 package eu.dnetlib.dhp.oa.provision;

-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.*;

 import java.io.IOException;
 import java.io.StringReader;
@ -22,6 +21,7 @@ import com.google.common.collect.Lists;
 import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
 import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
 import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
+import eu.dnetlib.dhp.oa.provision.utils.ContextDef;
 import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
 import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
 import eu.dnetlib.dhp.schema.oaf.*;
@ -51,7 +51,7 @@ public class XmlRecordFactoryTest {

 		assertNotNull(doc);

-		// System.out.println(doc.asXML());
+		System.out.println(doc.asXML());

 		assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
 		assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
@ -267,4 +267,39 @@ public class XmlRecordFactoryTest {

 	}

+	@Test
+	public void test_AKA_project() throws DocumentException, IOException {
+		final ContextMapper contextMapper = new ContextMapper();
+
+		contextMapper
+			.put("dh-ch", new ContextDef("dh-ch", "Digital Humanities and Cultural Heritage", "context", "community"));
+		contextMapper.put("dh-ch::projects", new ContextDef("dh-ch::projects", "DH-CH Projects", "category", ""));
+		contextMapper
+			.put("dh-ch::projects::2", new ContextDef("dh-ch::projects::2", "ARIADNE", "concept", "community"));
+
+		final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
+			PayloadConverterJob.schemaLocation);
+
+		final Project p = OBJECT_MAPPER
+			.readValue(
+				IOUtils.toString(getClass().getResourceAsStream("project_aka.json")),
+				Project.class);
+
+		assertNotNull(p.getContext());
+		assertEquals(1, p.getContext().size());
+		assertEquals("dh-ch::projects::2", p.getContext().get(0).getId());
+
+		final String xml = xmlRecordFactory.build(new JoinedEntity(p));
+
+		assertNotNull(xml);
+
+		final Document doc = new SAXReader().read(new StringReader(xml));
+
+		assertNotNull(doc);
+
+		assertEquals("dh-ch", doc.valueOf("//context/@id"));
+		assertEquals("dh-ch::projects", doc.valueOf("//context/category/@id"));
+		assertEquals("dh-ch::projects::2", doc.valueOf("//context/category/concept/@id"));
+	}
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml
@ -68,15 +68,12 @@
        <FIELD copy="true" indexable="false" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
        <FIELD copy="true" indexable="true" name="resultidentifier" result="false" stat="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/>
        <FIELD copy="true" indexable="false" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/>
-
        <FIELD indexable="true" multivalued="false" name="isgreen" result="false" stat="false" type="boolean" value="//*[local-name()='entity']/*[local-name()='result']/isgreen"/>
        <FIELD indexable="true" multivalued="false" name="openaccesscolor" result="false" stat="false" tokenizable="false" value="//*[local-name()='entity']/*[local-name()='result']/openaccesscolor"/>
        <FIELD indexable="true" multivalued="false" name="isindiamondjournal" result="false" stat="false" type="boolean" value="//*[local-name()='entity']/*[local-name()='result']/isindiamondjournal"/>
        <FIELD indexable="true" multivalued="false" name="publiclyfunded" result="false" stat="false" type="boolean" value="//*[local-name()='entity']/*[local-name()='result']/publiclyfunded"/>
-
        <FIELD indexable="true" multivalued="false" name="peerreviewed" result="false" stat="false" type="boolean" value="some $refereed in //*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='refereed']/@classid satisfies ($refereed = '0001')"/>
-
-
+        <FIELD indexable="true" multivalued="false" name="haslicense" result="false" stat="false" type="boolean" value="some $license in //*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='license']/text() satisfies (string-length($license) &gt; 0)"/>
        <FIELD indexable="true" name="eoscifguidelines" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
        <FIELD indexable="true" name="fos" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])"/>
        <FIELD indexable="true" name="foslabel" result="false" stat="false" tokenizable="false" value="concat(./text(), '||', replace(./text(), '^\d+\s', ''))" xpath="//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS']"/>
@ -93,6 +90,7 @@
        <FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
        <FIELD copy="true" indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
        <FIELD copy="true" indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
+        <FIELD indexable="true" name="relorganization" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./to, '||', ./legalname))" xpath="//*[local-name()='entity']/*//rel[./to/@type='organization']"/>
        <FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='publication' or @type='dataset' or @type='software' or @type='otherresearchproduct'])"/>
        <FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
        <FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
@ -122,6 +120,7 @@
        <FIELD indexable="true" name="categoryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@id)"/>
        <FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity -->
        <FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/>
+        <FIELD indexable="true" name="countrynojurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
        <FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/><!-- impact indicators -->
        <FIELD copy="false" indexable="true" multivalued="false" name="influence" result="false" stat="false" type="pfloat" xpath="//measure[@id='influence']/@score/number()"/>
        <FIELD copy="false" indexable="true" multivalued="false" name="influence_class" result="false" stat="false" type="string" xpath="//measure[@id='influence']/@class/string()"/>
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema
@ -194,228 +194,173 @@
  <fieldType name="tints" class="solr.TrieIntField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
  <fieldType name="tlong" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="8"/>
  <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
-
-  <!-- Indexed fields -->
  <field name="__all" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="__deleted" type="boolean" default="false" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="false"/>
-  <field name="__dsid" type="string" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
-  <field name="__dsversion" type="pdate" omitNorms="true" omitTermFreqAndPositions="true" indexed="true" stored="true"/>
  <field name="__indexrecordidentifier" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
-  <field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
  <field name="__json" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
+  <field name="__result" type="string" docValues="false" multiValued="false" indexed="false" stored="true"/>
  <field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
  <field name="_version_" type="long" multiValued="false" indexed="true" stored="true"/>
-  <field name="authorid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
-  <field name="authoridtype" type="string_ci" multiValued="true" indexed="true" stored="false"/>
-  <field name="categoryid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="categoryname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="collectedfrom" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="collectedfromdatasourceid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="collectedfromname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="community" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="communityid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="communityname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="conceptid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="conceptname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="contextid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="contextname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="contexttype" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="country" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourcecompatibilityid" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="datasourcecompatibilityname" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="datasourceenglishname" type="text_common" multiValued="true" indexed="true" stored="false"/>
+  <field name="authorid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="categoryid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="citation_count" type="pint" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="citation_count_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="collectedfromdatasourceid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="collectedfromname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="community" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="communityid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="conceptname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="contextid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="contextname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="country" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="countrynojurisdiction" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourcecompatibilityid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="datasourcecompatibilityname" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="datasourceenglishname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="datasourcejurisdiction" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourceodcontenttypes" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourceoddescription" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourceodlanguages" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourceodsubjects" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourceofficialname" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourcesubject" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourceodcontenttypes" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourceoddescription" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourceodlanguages" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourceodsubjects" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourceofficialname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourcesubject" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="datasourcethematic" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
-  <field name="datasourcetypename" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="datasourcetypeuiid" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="datasourcetypeuiname" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="dateofcollection" type="pdate" multiValued="false" indexed="true" stored="false"/>
-  <field name="deletedbyinference" type="string" multiValued="true" indexed="true" stored="false"/>
+  <field name="datasourcetypename" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="datasourcetypeuiid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="datasourcetypeuiname" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="dateofcollection" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="deletedbyinference" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="eoscdatasourcetype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="eoscifguidelines" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="eosctype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
-  <field name="externalrefclass" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="externalrefid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="externalreflabel" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="externalrefsite" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="funder" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="funderid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="funderjurisdiction" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundername" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="funderoriginalname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundershortname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel0_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel0_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel0_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel1_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel1_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel1_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel2_description" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel2_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="fundinglevel2_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="inferenceprovenance" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="inferred" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="instancetypename" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="oafentity" type="string" multiValued="true" indexed="false" stored="false"/>
-  <field name="oaftype" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="objidentifier" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationalternativenames" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationcountryname" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="organizationdupid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecenterprise" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecinternationalorganization" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecinternationalorganizationeurinterests" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationeclegalbody" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationeclegalperson" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecnonprofit" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecnutscode" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecresearchorganization" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationecsmevalidated" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationlegalname" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="organizationlegalshortname" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="originalid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
-  <field name="pid" type="string_ci" multiValued="true" indexed="true" stored="false"/>
-  <field name="pidclassid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="pidclassname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="projectacronym" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="projectcallidentifier" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectcode" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="projectcode_nt" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="projectcontracttypename" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectduration" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectecarticle29_3" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectecsc39" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectenddate" type="pdate" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectendyear" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectkeywords" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="projectoamandatepublications" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectstartdate" type="pdate" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectstartyear" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="projectsubject" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="projecttitle" type="ngramtext" multiValued="true" indexed="true" stored="false"/>
-  <field name="provenanceactionclassid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relclass" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relcollectedfromid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relcollectedfromname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relcontracttypeid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relcontracttypename" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="reldatasourcecompatibilityid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfunder" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfunderid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfunderjurisdiction" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundername" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundershortname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel0_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel0_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel1_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel1_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel2_id" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relfundinglevel2_name" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relinferenceprovenance" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relinferred" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relorganizationcountryid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relorganizationcountryname" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="relorganizationid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relorganizationname" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="relorganizationshortname" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="relproject" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relprojectcode" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relprojectid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relprojectname" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relprojecttitle" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="relprovenanceactionclassid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relresultid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="relresulttype" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="reltrust" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultacceptanceyear" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="resultaccessright" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultauthor" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultauthor_nt" type="string_ci" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultbestaccessright" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="resultdateofacceptance" type="pdate" multiValued="false" indexed="true" stored="false"/>
-  <field name="resultdescription" type="text_en" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultdupid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultembargoenddate" type="pdate" multiValued="false" indexed="true" stored="false"/>
-  <field name="resultembargoendyear" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="resulthostingdatasource" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resulthostingdatasourceid" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resulthostingdatasourcename" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultidentifier" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultlanguagename" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="resultlicense" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultpublisher" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultsource" type="text_common" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultsubject" type="text_en" multiValued="true" indexed="true" stored="false"/>
-  <field name="resultsubjectclass" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="resulttitle" type="text_en" multiValued="true" indexed="true" stored="false"/>
-  <field name="resulttypeid" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="resulttypename" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="semrelid" type="string" multiValued="true" indexed="true" stored="false"/>
+  <field name="externalreflabel" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fos" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="foslabel" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="funder" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="funderid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundershortname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel0_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel0_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel0_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel1_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel1_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel1_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel2_description" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel2_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="fundinglevel2_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="haslicense" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="impulse" type="pint" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="impulse_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="influence" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="influence_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="instancetypename" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="isgreen" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="isindiamondjournal" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="oafentity" type="string" docValues="false" multiValued="true" indexed="false" stored="false"/>
+  <field name="oaftype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="objidentifier" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="openaccesscolor" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="orcidtypevalue" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="organizationalternativenames" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="organizationdupid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="organizationlegalname" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="organizationlegalshortname" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="originalid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="peerreviewed" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="pid" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="pidclassid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="popularity" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="popularity_alt" type="pfloat" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="popularity_alt_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="popularity_class" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectacronym" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="projectcallidentifier" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectcode" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="projectcode_nt" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="projectduration" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectecsc39" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectenddate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectendyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectkeywords" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="projectoamandatepublications" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectstartdate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projectstartyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="projecttitle" type="ngramtext" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="projecttitle_alternative" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="provenanceactionclassid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="publiclyfunded" type="boolean" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="relclass" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relcontracttypename" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="reldatasourcecompatibilityid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfunder" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfunderid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundershortname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel0_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel0_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel1_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel1_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel2_id" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relfundinglevel2_name" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relorganizationcountryid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relorganizationcountryname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relorganizationid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relorganizationname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relorganizationshortname" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relproject" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relprojectcode" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relprojectid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relprojectname" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relprojecttitle" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relresultid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="relresulttype" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultacceptanceyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resultauthor" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultbestaccessright" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resultdateofacceptance" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resultdescription" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultdupid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultembargoenddate" type="date" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resultembargoendyear" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resulthostingdatasource" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resulthostingdatasourceid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultidentifier" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultlanguagename" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="resultpublisher" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultsource" type="text_common" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resultsubject" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resulttitle" type="text_en" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="resulttypeid" type="string" docValues="false" multiValued="false" indexed="true" stored="false"/>
+  <field name="sdg" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="semrelid" type="string" docValues="false" multiValued="true" indexed="true" stored="false"/>
+  <field name="status" type="string_ci" docValues="false" multiValued="true" indexed="true" stored="false"/>
  <field name="text" type="text_common" indexed="false" stored="false"/>
-  <field name="trust" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="versioning" type="string" multiValued="true" indexed="true" stored="false"/>
-  <field name="isgreen" type="boolean" multiValued="false" indexed="true" stored="false"/>
-  <field name="openaccesscolor" type="string" multiValued="false" indexed="true" stored="false"/>
-  <field name="isindiamondjournal" type="boolean" multiValued="false" indexed="true" stored="false"/>
-  <field name="publiclyfunded" type="boolean" multiValued="false" indexed="true" stored="false"/>
-  <field name="peerreviewed" type="boolean" multiValued="false" indexed="true" stored="false"/>
-
-  <!-- Copy field definitions follow: -->
-
-  <!-- Data source -->
  <copyField source="datasourceenglishname" dest="__all"/>
  <copyField source="datasourceoddescription" dest="__all"/>
+  <copyField source="datasourceodsubjects" dest="__all"/>
  <copyField source="datasourceofficialname" dest="__all"/>
  <copyField source="datasourcesubject" dest="__all"/>
-
-  <!-- Organization -->
+  <copyField source="externalreflabel" dest="__all"/>
+  <copyField source="fundinglevel0_description" dest="__all"/>
+  <copyField source="fundinglevel1_description" dest="__all"/>
+  <copyField source="fundinglevel2_description" dest="__all"/>
  <copyField source="organizationalternativenames" dest="__all"/>
-  <copyField source="organizationecenterprise" dest="__all"/>
-  <copyField source="organizationecinternationalorganization" dest="__all"/>
-  <copyField source="organizationecinternationalorganizationeurinterests" dest="__all"/>
-  <copyField source="organizationeclegalbody" dest="__all"/>
-  <copyField source="organizationeclegalperson" dest="__all"/>
-  <copyField source="organizationecnonprofit" dest="__all"/>
-  <copyField source="organizationecnutscode" dest="__all"/>
-  <copyField source="organizationecresearchorganization" dest="__all"/>
-  <copyField source="organizationecsmevalidated" dest="__all"/>
  <copyField source="organizationlegalname" dest="__all"/>
  <copyField source="organizationlegalshortname" dest="__all"/>
-
-  <!-- Project -->
  <copyField source="projectacronym" dest="__all"/>
  <copyField source="projectcode" dest="__all"/>
  <copyField source="projectkeywords" dest="__all"/>
  <copyField source="projecttitle" dest="__all"/>
-
-  <!-- Result -->
-  <copyField source="resultpublisher" dest="__all"/>
-  <copyField source="resultsource" dest="__all"/>
-  <copyField source="resultidentifier" dest="__all"/>
-  <copyField source="resultauthor" dest="__all"/>
-  <copyField source="resulttitle" dest="__all"/>
-  <copyField source="resultdescription" dest="__all"/>
-  <copyField source="resultsubject" dest="__all"/>
-  <copyField source="resultacceptanceyear" dest="__all"/>
-
-  <!-- Other -->
-  <copyField source="externalreflabel" dest="__all"/>
-
-  <copyField source="fundinglevel0_description" dest="__all"/>
-  <copyField source="fundinglevel1_description" dest="__all"/>
-  <copyField source="fundinglevel2_description" dest="__all"/>
-
+  <copyField source="projecttitle_alternative" dest="__all"/>
  <copyField source="relcontracttypename" dest="__all"/>
  <copyField source="relorganizationcountryname" dest="__all"/>
  <copyField source="relorganizationname" dest="__all"/>
  <copyField source="relorganizationshortname" dest="__all"/>
-  <copyField source="relprojecttitle" dest="__all"/>
  <copyField source="relprojectname" dest="__all"/>
-
+  <copyField source="relprojecttitle" dest="__all"/>
+  <copyField source="resultacceptanceyear" dest="__all"/>
+  <copyField source="resultauthor" dest="__all"/>
+  <copyField source="resultdescription" dest="__all"/>
+  <copyField source="resultidentifier" dest="__all"/>
+  <copyField source="resultpublisher" dest="__all"/>
+  <copyField source="resultsource" dest="__all"/>
+  <copyField source="resulttitle" dest="__all"/>
 </schema>
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/part-00000.json.gz
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/part-00000.json.gz
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/xml_json_sample.json.gz
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/xml_json_sample.json.gz
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
@ -63,7 +63,7 @@ function copydb() {
  start_db_time=$(date +%s)

  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
  if [ -n "$log_errors" ]; then
    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@ -120,7 +120,7 @@ function copydb() {
  start_create_schema_time=$(date +%s)

  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"

  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@ -148,7 +148,7 @@ function copydb() {
            exit 5
          fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
        if [ -n "$log_errors" ]; then
          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@ -182,7 +182,7 @@ function copydb() {
    new_num_of_views_to_retry=0

    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
      if [ -n "$specific_errors" ]; then
        echo -e "\nspecific_errors: ${specific_errors}\n"
@ -212,7 +212,7 @@ function copydb() {
    previous_num_of_views_to_retry=$new_num_of_views_to_retry
  done

-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
  echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"

  start_compute_stats_time=$(date +%s)
@ -222,9 +222,9 @@ function copydb() {
    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
      # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
      sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
      log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
      if [ -n "$log_errors" ]; then
        echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
@ -63,7 +63,7 @@ function copydb() {
  start_db_time=$(date +%s)

  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
  if [ -n "$log_errors" ]; then
    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@ -120,7 +120,7 @@ function copydb() {
  start_create_schema_time=$(date +%s)

  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"

  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@ -148,7 +148,7 @@ function copydb() {
            exit 5
          fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
        if [ -n "$log_errors" ]; then
          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@ -182,7 +182,7 @@ function copydb() {
    new_num_of_views_to_retry=0

    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
      if [ -n "$specific_errors" ]; then
        echo -e "\nspecific_errors: ${specific_errors}\n"
@ -212,7 +212,7 @@ function copydb() {
    previous_num_of_views_to_retry=$new_num_of_views_to_retry
  done

-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
  echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"

  start_compute_stats_time=$(date +%s)
@ -222,9 +222,9 @@ function copydb() {
    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
      # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
      sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
      log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
      if [ -n "$log_errors" ]; then
        echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
@ -63,7 +63,7 @@ function copydb() {
  start_db_time=$(date +%s)

  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
  if [ -n "$log_errors" ]; then
    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@ -120,7 +120,7 @@ function copydb() {
  start_create_schema_time=$(date +%s)

  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"

  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@ -148,7 +148,7 @@ function copydb() {
            exit 5
          fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
        if [ -n "$log_errors" ]; then
          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@ -182,7 +182,7 @@ function copydb() {
    new_num_of_views_to_retry=0

    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
      if [ -n "$specific_errors" ]; then
        echo -e "\nspecific_errors: ${specific_errors}\n"
@ -212,7 +212,7 @@ function copydb() {
    previous_num_of_views_to_retry=$new_num_of_views_to_retry
  done

-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
  echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"

  start_compute_stats_time=$(date +%s)
@ -222,9 +222,9 @@ function copydb() {
    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
      # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
      sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
      log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
      if [ -n "$log_errors" ]; then
        echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@ -1,4 +1,4 @@
-<workflow-app name="Graph Stats" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="Promote Graph Stats" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>stats_db_name</name>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
@ -65,7 +65,7 @@ function copydb() {
  start_db_time=$(date +%s)

  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
  if [ -n "$log_errors" ]; then
    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@ -122,7 +122,7 @@ function copydb() {
  start_create_schema_time=$(date +%s)

  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"

  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@ -150,7 +150,7 @@ function copydb() {
            exit 5
          fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
        if [ -n "$log_errors" ]; then
          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@ -184,7 +184,7 @@ function copydb() {
    new_num_of_views_to_retry=0

    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
      if [ -n "$specific_errors" ]; then
        echo -e "\nspecific_errors: ${specific_errors}\n"
@ -214,7 +214,7 @@ function copydb() {
    previous_num_of_views_to_retry=$new_num_of_views_to_retry
  done

-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
  echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"

  start_compute_stats_time=$(date +%s)
@ -224,9 +224,9 @@ function copydb() {
    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
      # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
      sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
      log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
      if [ -n "$log_errors" ]; then
        echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
@ -271,8 +271,7 @@ copydb $MONITOR_DB'_institutions'
 copydb $MONITOR_DB'_ris_tail'

 contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
-for i in ${contexts}
-do
+for i in ${contexts}; do
  tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
  copydb ${MONITOR_DB}'_'${tmp}
 done
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh
@ -6,12 +6,16 @@ then
    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
 fi

+export HADOOP_USER_NAME=$3
+
+IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
+
 function createPDFsAggregated() {
  db=$1

-impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table if exists indi_is_result_accessible";
+  impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table if exists indi_is_result_accessible";

-impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "create table indi_is_result_accessible stored as parquet as
+  impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "create table indi_is_result_accessible stored as parquet as
    select distinct p.id, coalesce(is_result_accessible, 0) as is_result_accessible from result p
    left outer join
      (select id, 1 as is_result_accessible from (select pl.* from result r
@ -20,7 +24,8 @@ impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db}
      union all
      select pl.* from result r
      join pdfaggregation_i.publication p on r.id=p.dedupid
-    join pdfaggregation_i.payload pl on pl.id=p.id) foo) tmp on p.id=tmp.id";
+      join pdfaggregation_i.payload pl on pl.id=p.id) foo)
+      tmp on p.id=tmp.id";
 }

 STATS_DB=$1
@ -35,8 +40,7 @@ createPDFsAggregated $MONITOR_DB'_institutions'
 createPDFsAggregated $MONITOR_DB'_ris_tail'

 contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
-for i in ${contexts}
-do
+for i in ${contexts}; do
  tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
  createPDFsAggregated ${MONITOR_DB}'_'${tmp}
 done
--- a/dhp-workflows/dhp-swh/pom.xml
+++ b/dhp-workflows/dhp-swh/pom.xml
@ -51,49 +51,6 @@
            <artifactId>hadoop-distcp</artifactId>
        </dependency>

-        <dependency>
-            <groupId>eu.dnetlib</groupId>
-            <artifactId>dnet-actionmanager-api</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>eu.dnetlib</groupId>
-            <artifactId>dnet-actionmanager-common</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>eu.dnetlib</groupId>
-                    <artifactId>dnet-openaireplus-mapping-utils</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>saxonica</groupId>
-                    <artifactId>saxon</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>saxonica</groupId>
-                    <artifactId>saxon-dom</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>jgrapht</groupId>
-                    <artifactId>jgrapht</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>net.sf.ehcache</groupId>
-                    <artifactId>ehcache</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.springframework</groupId>
-                    <artifactId>spring-test</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.*</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>apache</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
--- a/pom.xml
+++ b/pom.xml
@ -440,29 +440,6 @@
                <scope>provided</scope>
            </dependency>

-            <dependency>
-                <groupId>eu.dnetlib</groupId>
-                <artifactId>dnet-actionmanager-common</artifactId>
-                <version>${dnet-actionmanager-common.version}</version>
-                <exclusions>
-                    <exclusion>
-                        <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-common</artifactId>
-                    </exclusion>
-                </exclusions>
-            </dependency>
-            <dependency>
-                <groupId>eu.dnetlib</groupId>
-                <artifactId>dnet-actionmanager-api</artifactId>
-                <version>${dnet-actionmanager-api.version}</version>
-                <exclusions>
-                    <exclusion>
-                        <groupId>eu.dnetlib</groupId>
-                        <artifactId>cnr-misc-utils</artifactId>
-                    </exclusion>
-                </exclusions>
-            </dependency>
-
            <dependency>
                <groupId>eu.dnetlib</groupId>
                <artifactId>cnr-rmi-api</artifactId>
@ -960,7 +937,7 @@
        <commons.logging.version>1.1.3</commons.logging.version>
        <commons-validator.version>1.7</commons-validator.version>
        <dateparser.version>1.0.7</dateparser.version>
-        <dhp-schemas.version>[6.1.3-SNAPSHOT]</dhp-schemas.version>
+        <dhp-schemas.version>[7.0.0]</dhp-schemas.version>
        <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
        <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
        <dhp.guava.version>11.0.2</dhp.guava.version>
@ -969,8 +946,6 @@
        <dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
        <dhp.site.skip>true</dhp.site.skip>
        <dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
-        <dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
-        <dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
        <dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
        <google.gson.version>2.2.2</google.gson.version>
        <log4j.version>1.2.17</log4j.version>