resolved conflict

[DataciteHostedByMap] added entry for EBRAINS
Merge pull request '[BETA] fixed the result_country definition and updated the stats DB copy procedure' (#416 ) from antonis.lempesis/dnet-hadoop:beta into beta
2024-04-11 17:38:16 +02:00 · 2024-04-04 09:14:58 +02:00 · 2024-04-03 12:36:03 +02:00 · 2024-04-03 13:15:37 +03:00 · 2024-04-03 09:50:41 +02:00 · 2024-04-03 09:50:21 +02:00
242 changed files with 111281 additions and 1604 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -145,105 +145,6 @@ public class AuthorMerger {
 		return null;
 	}

-	/**
-	 * This method tries to figure out when two author are the same in the contest
-	 * of ORCID enrichment
-	 *
-	 * @param left  Author in the OAF entity
-	 * @param right Author ORCID
-	 * @return based on a heuristic on the names of the authors if they are the same.
-	 */
-	public static boolean checkORCIDSimilarity(final Author left, final Author right) {
-		final Person pl = parse(left);
-		final Person pr = parse(right);
-
-		// If one of them didn't have a surname we verify if they have the fullName not empty
-		// and verify if the normalized version is equal
-		if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
-			pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
-
-			if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
-				&& !pr.getFullname().isEmpty()) {
-				return pl
-					.getFullname()
-					.stream()
-					.anyMatch(
-						fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
-			} else {
-				return false;
-			}
-		}
-		// The Authors have one surname in common
-		if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
-
-			// If one of them has only a surname and is the same we can say that they are the same author
-			if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
-				(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
-				return true;
-			// The authors have the same initials of Name in common
-			if (pl
-				.getName()
-				.stream()
-				.anyMatch(
-					nl -> pr
-						.getName()
-						.stream()
-						.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
-				return true;
-		}
-
-		// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
-		// We verify if we have an exact match between name and surname
-		if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
-			pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
-			return true;
-		else
-			return false;
-	}
-	//
-
-	/**
-	 * Method to enrich ORCID information in one list of authors based on another list
-	 *
-	 * @param baseAuthor  the Author List in the OAF Entity
-	 * @param orcidAuthor The list of ORCID Author intersected
-	 * @return The Author List of the OAF Entity enriched with the orcid Author
-	 */
-	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
-
-		if (baseAuthor == null || baseAuthor.isEmpty())
-			return orcidAuthor;
-
-		if (orcidAuthor == null || orcidAuthor.isEmpty())
-			return baseAuthor;
-
-		if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
-			return baseAuthor;
-
-		final List<Author> oAuthor = new ArrayList<>();
-		oAuthor.addAll(orcidAuthor);
-
-		baseAuthor.forEach(ba -> {
-			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
-			if (aMatch.isPresent()) {
-				final Author sameAuthor = aMatch.get();
-				addPid(ba, sameAuthor.getPid());
-				oAuthor.remove(sameAuthor);
-			}
-		});
-		return baseAuthor;
-	}
-
-	private static void addPid(final Author a, final List<StructuredProperty> pids) {
-
-		if (a.getPid() == null) {
-			a.setPid(new ArrayList<>());
-		}
-
-		a.getPid().addAll(pids);
-
-	}
-
 	public static String pidToComparableString(StructuredProperty pid) {
 		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
 			: "";
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@ -26,7 +26,7 @@ import eu.dnetlib.dhp.schema.common.EntityType;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -135,7 +135,7 @@ public class GroupEntitiesSparkJob {
 					.applyCoarVocabularies(entity, vocs),
 				OAFENTITY_KRYO_ENC)
 			.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
-			.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
+			.reduceGroups((ReduceFunction<OafEntity>) MergeUtils::checkedMerge)
 			.map(
 				(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
 					t._2().getClass().getName(), t._2()),
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
@ -0,0 +1,76 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class CleaningFunctions {
+
+	public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
+	public static final String DOI_PREFIX = "10.";
+
+	public static final Set<String> PID_BLACKLIST = new HashSet<>();
+
+	static {
+		PID_BLACKLIST.add("none");
+		PID_BLACKLIST.add("na");
+	}
+
+	public CleaningFunctions() {
+	}
+
+	/**
+	 * Utility method that filter PID values on a per-type basis.
+	 * @param s the PID whose value will be checked.
+	 * @return false if the pid matches the filter criteria, true otherwise.
+	 */
+	public static boolean pidFilter(StructuredProperty s) {
+		final String pidValue = s.getValue();
+		if (Objects.isNull(s.getQualifier()) ||
+			StringUtils.isBlank(pidValue) ||
+			StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
+			return false;
+		}
+		if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
+			return false;
+		}
+		return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
+	}
+
+	/**
+	 * Utility method that normalises PID values on a per-type basis.
+	 * @param pid the PID whose value will be normalised.
+	 * @return the PID containing the normalised value.
+	 */
+	public static StructuredProperty normalizePidValue(StructuredProperty pid) {
+		pid
+			.setValue(
+				normalizePidValue(
+					pid.getQualifier().getClassid(),
+					pid.getValue()));
+
+		return pid;
+	}
+
+	public static String normalizePidValue(String pidType, String pidValue) {
+		String value = Optional
+			.ofNullable(pidValue)
+			.map(String::trim)
+			.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
+
+		switch (pidType) {
+
+			// TODO add cleaning for more PID types as needed
+			case "doi":
+				return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
+		}
+		return value;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -506,6 +506,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 								.filter(Objects::nonNull)
 								.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
 								.map(GraphCleaningFunctions::cleanValue)
+								.sorted((s1, s2) -> s2.getValue().length() - s1.getValue().length())
+								.limit(ModelHardLimits.MAX_ABSTRACTS)
 								.collect(Collectors.toList()));
 				}
 				if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
@ -0,0 +1,294 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.HashBiMap;
+import com.google.common.collect.Maps;
+
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+/**
+ * Factory class for OpenAIRE identifiers in the Graph
+ */
+public class IdentifierFactory implements Serializable {
+
+	public static final String ID_SEPARATOR = "::";
+	public static final String ID_PREFIX_SEPARATOR = "|";
+
+	public static final int ID_PREFIX_LEN = 12;
+
+	/**
+	 * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
+	 * The id of the record (source_::id) will be rewritten as pidType_::id)
+	 */
+	public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
+
+	static {
+		PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
+		PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
+		PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
+		PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
+		PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
+
+		PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
+		PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
+		PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
+
+		PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
+		PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
+		PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
+
+		PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
+		PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
+
+		PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
+		PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
+	}
+
+	/**
+	 * Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
+	 * PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
+	 *
+	 * If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
+	 * See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+	 */
+	public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
+
+	static {
+		DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
+		DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
+		DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
+		DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
+		DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
+	}
+
+	/**
+	 * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
+	 * Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
+	 * the same OpenAIRE id.
+	 */
+	public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
+
+	static {
+		ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
+		ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
+	}
+
+	public static Set<String> delegatedAuthorityDatasourceIds() {
+		return DELEGATED_PID_AUTHORITY
+			.values()
+			.stream()
+			.flatMap(m -> m.keySet().stream())
+			.collect(Collectors.toCollection(HashSet::new));
+	}
+
+	public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
+		return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
+	}
+
+	public static <T extends Result> String createDOIBoostIdentifier(T entity) {
+		if (entity == null)
+			return null;
+
+		StructuredProperty pid = null;
+		if (entity.getPid() != null) {
+			pid = entity
+				.getPid()
+				.stream()
+				.filter(Objects::nonNull)
+				.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
+				.filter(CleaningFunctions::pidFilter)
+				.findAny()
+				.orElse(null);
+		} else {
+			if (entity.getInstance() != null) {
+				pid = entity
+					.getInstance()
+					.stream()
+					.filter(i -> i.getPid() != null)
+					.flatMap(i -> i.getPid().stream())
+					.filter(CleaningFunctions::pidFilter)
+					.findAny()
+					.orElse(null);
+			}
+		}
+		if (pid != null)
+			return idFromPid(entity, pid, true);
+		return null;
+	}
+
+	/**
+	 * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
+	 * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
+	 *
+	 * @param entity the entity providing PIDs and a default ID.
+	 * @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
+	 * @param md5 indicates whether should hash the PID value or not.
+	 * @return an identifier from the most relevant PID, entity.id otherwise
+	 */
+	public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
+
+		checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
+
+		final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
+
+		return pids
+			.values()
+			.stream()
+			.flatMap(Set::stream)
+			.min(new PidComparator<>(entity))
+			.map(
+				min -> Optional
+					.ofNullable(pids.get(min.getQualifier().getClassid()))
+					.map(
+						p -> p
+							.stream()
+							.sorted(new PidValueComparator())
+							.findFirst()
+							.map(s -> idFromPid(entity, s, md5))
+							.orElseGet(entity::getId))
+					.orElseGet(entity::getId))
+			.orElseGet(entity::getId);
+	}
+
+	private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
+		if (entity instanceof Result) {
+			return Optional
+				.ofNullable(((Result) entity).getInstance())
+				.map(IdentifierFactory::mapPids)
+				.orElse(new HashMap<>());
+		} else {
+			return entity
+				.getPid()
+				.stream()
+				.map(CleaningFunctions::normalizePidValue)
+				.filter(CleaningFunctions::pidFilter)
+				.collect(
+					Collectors
+						.groupingBy(
+							p -> p.getQualifier().getClassid(),
+							Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
+		}
+	}
+
+	private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
+		return instance
+			.stream()
+			.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
+			.flatMap(Function.identity())
+			.collect(
+				Collectors
+					.groupingBy(
+						p -> p.getQualifier().getClassid(),
+						Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
+	}
+
+	private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
+		boolean mapHandles) {
+		return Optional
+			.ofNullable(pid)
+			.map(
+				pp -> pp
+					.stream()
+					// filter away PIDs provided by a DS that is not considered an authority for the
+					// given PID Type
+					.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
+					.map(CleaningFunctions::normalizePidValue)
+					.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
+					.filter(CleaningFunctions::pidFilter))
+			.orElse(Stream.empty());
+	}
+
+	private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
+		final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
+
+		if (Objects.isNull(collectedFrom)) {
+			return false;
+		}
+
+		boolean isEnrich = Optional
+			.ofNullable(ENRICHMENT_PROVIDER.get(pType))
+			.map(
+				enrich -> enrich.containsKey(collectedFrom.getKey())
+					|| enrich.containsValue(collectedFrom.getValue()))
+			.orElse(false);
+
+		boolean isAuthority = Optional
+			.ofNullable(PID_AUTHORITY.get(pType))
+			.map(
+				authorities -> authorities.containsKey(collectedFrom.getKey())
+					|| authorities.containsValue(collectedFrom.getValue()))
+			.orElse(false);
+
+		return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
+	}
+
+	private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
+		final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
+
+		final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
+		if (Objects.isNull(da)) {
+			return true;
+		}
+		if (!da.containsKey(collectedFrom.getKey())) {
+			return true;
+		}
+		return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
+	}
+
+	/**
+	 * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
+	 */
+	public static <T extends OafEntity> String createIdentifier(T entity) {
+
+		return createIdentifier(entity, true);
+	}
+
+	private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
+		return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
+	}
+
+	public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
+		return new StringBuilder()
+			.append(numericPrefix)
+			.append(ID_PREFIX_SEPARATOR)
+			.append(createPrefix(pidType))
+			.append(ID_SEPARATOR)
+			.append(md5 ? md5(pidValue) : pidValue)
+			.toString();
+	}
+
+	// create the prefix (length = 12)
+	private static String createPrefix(String pidType) {
+		StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
+		while (prefix.length() < ID_PREFIX_LEN) {
+			prefix.append("_");
+		}
+		return prefix.substring(0, ID_PREFIX_LEN);
+	}
+
+	public static String md5(final String s) {
+		try {
+			final MessageDigest md = MessageDigest.getInstance("MD5");
+			md.update(s.getBytes(StandardCharsets.UTF_8));
+			return new String(Hex.encodeHex(md.digest()));
+		} catch (final Exception e) {
+			return null;
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeComparator.java
@ -0,0 +1,78 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+// 
+// Source code recreated from a .class file by IntelliJ IDEA
+// (powered by FernFlower decompiler)
+//
+import eu.dnetlib.dhp.schema.common.EntityType;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class MergeComparator implements Comparator<Oaf> {
+	public MergeComparator() {
+	}
+
+	public int compare(Oaf left, Oaf right) {
+		// nulls at the end
+		if (left == null && right == null) {
+			return 0;
+		} else if (left == null) {
+			return -1;
+		} else if (right == null) {
+			return 1;
+		}
+
+		// invisible
+		if (left.getDataInfo() != null && left.getDataInfo().getInvisible() == true) {
+			if (right.getDataInfo() != null && right.getDataInfo().getInvisible() == false) {
+				return -1;
+			}
+		}
+
+		// collectedfrom
+		HashSet<String> lCf = getCollectedFromIds(left);
+		HashSet<String> rCf = getCollectedFromIds(right);
+		if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
+			&& !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
+			return -1;
+		} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
+			&& rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
+			return 1;
+		}
+
+		SubEntityType lClass = SubEntityType.fromClass(left.getClass());
+		SubEntityType rClass = SubEntityType.fromClass(right.getClass());
+		return lClass.ordinal() - rClass.ordinal();
+
+	}
+
+	protected HashSet<String> getCollectedFromIds(Oaf left) {
+		return (HashSet) Optional.ofNullable(left.getCollectedfrom()).map((cf) -> {
+			return (HashSet) cf.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
+		}).orElse(new HashSet());
+	}
+
+	enum SubEntityType {
+		publication, dataset, software, otherresearchproduct, datasource, organization, project;
+
+		/**
+		 * Resolves the EntityType, given the relative class name
+		 *
+		 * @param clazz the given class name
+		 * @param <T> actual OafEntity subclass
+		 * @return the EntityType associated to the given class
+		 */
+		public static <T extends Oaf> SubEntityType fromClass(Class<T> clazz) {
+			return valueOf(clazz.getSimpleName().toLowerCase());
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java
@ -0,0 +1,891 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
+
+import java.text.ParseException;
+import java.time.ZoneId;
+import java.util.*;
+import java.util.function.BinaryOperator;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+
+import com.github.sisyphsu.dateparser.DateParserUtils;
+import com.google.common.base.Joiner;
+
+import eu.dnetlib.dhp.schema.common.AccessRightComparator;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+public class MergeUtils {
+
+	public static <T extends Oaf> T checkedMerge(final T left, final T right) {
+		return (T) merge(left, right, false);
+	}
+
+	public static Oaf merge(final Oaf left, final Oaf right) {
+		return merge(left, right, false);
+	}
+
+	public static Oaf merge(final Oaf left, final Oaf right, boolean checkDelegatedAuthority) {
+		if (sameClass(left, right, OafEntity.class)) {
+			return mergeEntities(left, right, checkDelegatedAuthority);
+		} else if (sameClass(left, right, Relation.class)) {
+			return mergeRelation((Relation) left, (Relation) right);
+		} else {
+			throw new RuntimeException(
+				String
+					.format(
+						"MERGE_FROM_AND_GET incompatible types: %s, %s",
+						left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
+		}
+	}
+
+	private static <T extends Oaf> boolean sameClass(Object left, Object right, Class<T> cls) {
+		return cls.isAssignableFrom(left.getClass()) && cls.isAssignableFrom(right.getClass());
+	}
+
+	private static Oaf mergeEntities(Oaf left, Oaf right, boolean checkDelegatedAuthority) {
+
+		if (sameClass(left, right, Result.class)) {
+			if (!left.getClass().equals(right.getClass()) || checkDelegatedAuthority) {
+				return mergeResultsOfDifferentTypes((Result) left, (Result) right);
+			}
+
+			if (sameClass(left, right, Publication.class)) {
+				return mergePublication((Publication) left, (Publication) right);
+			}
+			if (sameClass(left, right, Dataset.class)) {
+				return mergeDataset((Dataset) left, (Dataset) right);
+			}
+			if (sameClass(left, right, OtherResearchProduct.class)) {
+				return mergeORP((OtherResearchProduct) left, (OtherResearchProduct) right);
+			}
+			if (sameClass(left, right, Software.class)) {
+				return mergeSoftware((Software) left, (Software) right);
+			}
+
+			return mergeResult((Result) left, (Result) right);
+		} else if (sameClass(left, right, Datasource.class)) {
+			// TODO
+			final int trust = compareTrust(left, right);
+			return mergeOafEntityFields((Datasource) left, (Datasource) right, trust);
+		} else if (sameClass(left, right, Organization.class)) {
+			return mergeOrganization((Organization) left, (Organization) right);
+		} else if (sameClass(left, right, Project.class)) {
+			return mergeProject((Project) left, (Project) right);
+		} else {
+			throw new RuntimeException(
+				String
+					.format(
+						"MERGE_FROM_AND_GET incompatible types: %s, %s",
+						left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
+		}
+	}
+
+	/**
+	 * This method is used in the global result grouping phase. It checks if one of the two is from a delegated authority
+	 * https://graph.openaire.eu/docs/data-model/pids-and-identifiers#delegated-authorities and in that case it prefers
+	 * such version.
+	 * <p>
+	 * Otherwise, it considers a resulttype priority order implemented in {@link ResultTypeComparator}
+	 * and proceeds with the canonical property merging.
+	 *
+	 * @param left
+	 * @param right
+	 * @return
+	 */
+	private static <T extends Result> T mergeResultsOfDifferentTypes(T left, T right) {
+
+		final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
+		final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
+
+		if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
+			return left;
+		}
+		if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
+			return right;
+		}
+		// TODO: raise trust to have preferred fields from one or the other??
+		if (new ResultTypeComparator().compare(left, right) < 0) {
+			return mergeResult(left, right);
+		} else {
+			return mergeResult(right, left);
+		}
+	}
+
+	private static DataInfo chooseDataInfo(DataInfo left, DataInfo right, int trust) {
+		if (trust > 0) {
+			return left;
+		} else if (trust == 0) {
+			if (left == null || (left.getInvisible() != null && left.getInvisible().equals(Boolean.TRUE))) {
+				return right;
+			} else {
+				return left;
+			}
+		} else {
+			return right;
+		}
+	}
+
+	private static String chooseString(String left, String right, int trust) {
+		if (trust > 0) {
+			return left;
+		} else if (trust == 0) {
+			return StringUtils.isNotBlank(left) ? left : right;
+		} else {
+			return right;
+		}
+	}
+
+	private static <T> T chooseReference(T left, T right, int trust) {
+		if (trust > 0) {
+			return left;
+		} else if (trust == 0) {
+			return left != null ? left : right;
+		} else {
+			return right;
+		}
+	}
+
+	private static Long max(Long left, Long right) {
+		if (left == null)
+			return right;
+		if (right == null)
+			return left;
+
+		return Math.max(left, right);
+	}
+
+	// trust ??
+	private static Boolean booleanOR(Boolean a, Boolean b) {
+		if (a == null) {
+			return b;
+		} else if (b == null) {
+			return a;
+		}
+
+		return a || b;
+	}
+
+	private static <T, K> List<T> mergeLists(final List<T> left, final List<T> right, int trust,
+		Function<T, K> keyExtractor, BinaryOperator<T> merger) {
+		if (left == null) {
+			return right;
+		} else if (right == null) {
+			return left;
+		}
+
+		List<T> h = trust >= 0 ? left : right;
+		List<T> l = trust >= 0 ? right : left;
+
+		return new ArrayList<>(Stream
+			.concat(h.stream(), l.stream())
+			.filter(Objects::nonNull)
+			.distinct()
+			.collect(Collectors.toMap(keyExtractor, v -> v, merger))
+			.values());
+	}
+
+	private static <T, K> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
+		if (left == null) {
+			return right;
+		} else if (right == null) {
+			return left;
+		}
+
+		List<T> h = trust >= 0 ? left : right;
+		List<T> l = trust >= 0 ? right : left;
+
+		return Stream
+			.concat(h.stream(), l.stream())
+			.filter(Objects::nonNull)
+			.distinct()
+			.collect(Collectors.toList());
+	}
+
+	private static List<String> unionDistinctListOfString(final List<String> l, final List<String> r) {
+		if (l == null) {
+			return r;
+		} else if (r == null) {
+			return l;
+		}
+
+		return Stream
+			.concat(l.stream(), r.stream())
+			.filter(StringUtils::isNotBlank)
+			.distinct()
+			.collect(Collectors.toList());
+	}
+
+	// TODO review
+	private static List<KeyValue> mergeKeyValue(List<KeyValue> left, List<KeyValue> right, int trust) {
+		if (trust < 0) {
+			List<KeyValue> s = left;
+			left = right;
+			right = s;
+		}
+
+		HashMap<String, KeyValue> values = new HashMap<>();
+		left.forEach(kv -> values.put(kv.getKey(), kv));
+		right.forEach(kv -> values.putIfAbsent(kv.getKey(), kv));
+
+		return new ArrayList<>(values.values());
+	}
+
+	private static List<StructuredProperty> unionTitle(List<StructuredProperty> left, List<StructuredProperty> right,
+		int trust) {
+		if (left == null) {
+			return right;
+		} else if (right == null) {
+			return left;
+		}
+
+		List<StructuredProperty> h = trust >= 0 ? left : right;
+		List<StructuredProperty> l = trust >= 0 ? right : left;
+
+		return Stream
+			.concat(h.stream(), l.stream())
+			.filter(Objects::isNull)
+			.distinct()
+			.collect(Collectors.toList());
+	}
+
+	/**
+	 * Internal utility that merges the common OafEntity fields
+	 *
+	 * @param merged
+	 * @param enrich
+	 * @param <T>
+	 * @return
+	 */
+	private static <T extends Oaf> T mergeOafFields(T merged, T enrich, int trust) {
+
+		// TODO: union of all values, but what does it mean with KeyValue pairs???
+		merged.setCollectedfrom(mergeKeyValue(merged.getCollectedfrom(), enrich.getCollectedfrom(), trust));
+		merged.setDataInfo(chooseDataInfo(merged.getDataInfo(), enrich.getDataInfo(), trust));
+		merged.setLastupdatetimestamp(max(merged.getLastupdatetimestamp(), enrich.getLastupdatetimestamp()));
+
+		return merged;
+	}
+
+	/**
+	 * Internal utility that merges the common OafEntity fields
+	 *
+	 * @param original
+	 * @param enrich
+	 * @param <T>
+	 * @return
+	 */
+	private static <T extends OafEntity> T mergeOafEntityFields(T original, T enrich, int trust) {
+		final T merged = mergeOafFields(original, enrich, trust);
+
+		merged.setOriginalId(unionDistinctListOfString(merged.getOriginalId(), enrich.getOriginalId()));
+		merged.setPid(unionDistinctLists(merged.getPid(), enrich.getPid(), trust));
+		// dateofcollection mettere today quando si fa merge
+		merged.setDateofcollection(chooseString(merged.getDateofcollection(), enrich.getDateofcollection(), trust));
+		// setDateoftransformation mettere vuota in dedup, nota per Claudio
+		merged
+			.setDateoftransformation(
+				chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust));
+		// TODO: was missing in OafEntity.merge
+		merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust));
+		// oaiprovenanze da mettere a null quando si genera merge
+		merged.setOaiprovenance(chooseReference(merged.getOaiprovenance(), enrich.getOaiprovenance(), trust));
+		merged.setMeasures(unionDistinctLists(merged.getMeasures(), enrich.getMeasures(), trust));
+
+		return merged;
+	}
+
+	public static <T extends Relation> T mergeRelation(T original, T enrich) {
+		int trust = compareTrust(original, enrich);
+		T merge = mergeOafFields(original, enrich, trust);
+
+		checkArgument(Objects.equals(merge.getSource(), enrich.getSource()), "source ids must be equal");
+		checkArgument(Objects.equals(merge.getTarget(), enrich.getTarget()), "target ids must be equal");
+		checkArgument(Objects.equals(merge.getRelType(), enrich.getRelType()), "relType(s) must be equal");
+		checkArgument(
+			Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal");
+		checkArgument(Objects.equals(merge.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal");
+
+		// merge.setProvenance(mergeLists(merge.getProvenance(), enrich.getProvenance()));
+
+		// TODO: trust ??
+		merge.setValidated(booleanOR(merge.getValidated(), enrich.getValidated()));
+		try {
+			merge.setValidationDate(ModelSupport.oldest(merge.getValidationDate(), enrich.getValidationDate()));
+		} catch (ParseException e) {
+			throw new IllegalArgumentException(String
+				.format(
+					"invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(),
+					merge.getTarget(),
+					merge.getValidationDate()));
+		}
+
+		// TODO keyvalue merge
+		merge.setProperties(mergeKeyValue(merge.getProperties(), enrich.getProperties(), trust));
+
+		return merge;
+	}
+
+	public static <T extends Result> T mergeResult(T original, T enrich) {
+		final int trust = compareTrust(original, enrich);
+		T merge = mergeOafEntityFields(original, enrich, trust);
+
+		if (merge.getProcessingchargeamount() == null
+			|| StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) {
+			merge.setProcessingchargeamount(enrich.getProcessingchargeamount());
+			merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
+		}
+
+		// author = usare la stessa logica che in dedup
+		merge.setAuthor(chooseReference(merge.getAuthor(), enrich.getAuthor(), trust));
+		// il primo che mi arriva secondo l'ordinamento per priorita'
+		merge.setResulttype(chooseReference(merge.getResulttype(), enrich.getResulttype(), trust));
+		// gestito come il resulttype perche' e' un subtype
+		merge.setMetaResourceType(chooseReference(merge.getMetaResourceType(), enrich.getMetaResourceType(), trust));
+		// spostiamo nell'instance e qui prendo il primo che arriva
+		merge.setLanguage(chooseReference(merge.getLanguage(), enrich.getLanguage(), trust));
+		// country lasicamo,o cosi' -> parentesi sul datainfo
+		merge.setCountry(unionDistinctLists(merge.getCountry(), enrich.getCountry(), trust));
+		// ok
+		merge.setSubject(unionDistinctLists(merge.getSubject(), enrich.getSubject(), trust));
+		// union per priority quindi vanno in append
+		merge.setTitle(unionTitle(merge.getTitle(), enrich.getTitle(), trust));
+		// ok
+		merge.setRelevantdate(unionDistinctLists(merge.getRelevantdate(), enrich.getRelevantdate(), trust));
+		// prima trust e poi longest list
+		merge.setDescription(longestLists(merge.getDescription(), enrich.getDescription()));
+		// trust piu' alto e poi piu' vecchia
+		merge.setDateofacceptance(chooseReference(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust));
+		// ok, ma publisher va messo ripetibile
+		merge.setPublisher(chooseReference(merge.getPublisher(), enrich.getPublisher(), trust));
+		// ok
+		merge.setEmbargoenddate(chooseReference(merge.getEmbargoenddate(), enrich.getEmbargoenddate(), trust));
+		// ok
+		merge.setSource(unionDistinctLists(merge.getSource(), enrich.getSource(), trust));
+		// ok
+		merge.setFulltext(unionDistinctLists(merge.getFulltext(), enrich.getFulltext(), trust));
+		// ok
+		merge.setFormat(unionDistinctLists(merge.getFormat(), enrich.getFormat(), trust));
+		// ok
+		merge.setContributor(unionDistinctLists(merge.getContributor(), enrich.getContributor(), trust));
+
+		// prima prendo l'higher trust, su questo prendo il valore migliore nelle istanze TODO
+		// trust maggiore ma a parita' di trust il piu' specifico (base del vocabolario)
+		// vedi note
+		// cannot use com.google.common.base.Objects.firstNonNull as it throws NPE when both terms are null
+		merge.setResourcetype(firstNonNull(merge.getResourcetype(), enrich.getResourcetype()));
+
+		// ok
+		merge.setCoverage(unionDistinctLists(merge.getCoverage(), enrich.getCoverage(), trust));
+
+		// most open ok
+		if (enrich.getBestaccessright() != null
+			&& new AccessRightComparator<>()
+				.compare(enrich.getBestaccessright(), merge.getBestaccessright()) < 0) {
+			merge.setBestaccessright(enrich.getBestaccessright());
+		}
+
+		// TODO merge of datainfo given same id
+		merge.setContext(unionDistinctLists(merge.getContext(), enrich.getContext(), trust));
+
+		// ok
+		merge
+			.setExternalReference(
+				unionDistinctLists(merge.getExternalReference(), enrich.getExternalReference(), trust));
+
+		// instance enrichment or union
+		// review instance equals => add pid to comparision
+		if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
+			merge
+				.setInstance(
+					mergeLists(
+						merge.getInstance(), enrich.getInstance(), trust,
+						MergeUtils::instanceKeyExtractor,
+						MergeUtils::instanceMerger));
+		else {
+			final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
+				: enrich.getInstance();
+			final List<Instance> enrichedInstances = isAnEnrichment(merge) ? enrich.getInstance()
+				: merge.getInstance();
+			if (isAnEnrichment(merge))
+				merge.setDataInfo(enrich.getDataInfo());
+			merge.setInstance(enrichInstances(enrichedInstances, enrichmentInstances));
+		}
+
+		merge.setEoscifguidelines(unionDistinctLists(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust));
+		merge.setIsGreen(booleanOR(merge.getIsGreen(), enrich.getIsGreen()));
+		// OK but should be list of values
+		merge.setOpenAccessColor(chooseReference(merge.getOpenAccessColor(), enrich.getOpenAccessColor(), trust));
+		merge.setIsInDiamondJournal(booleanOR(merge.getIsInDiamondJournal(), enrich.getIsInDiamondJournal()));
+		merge.setPubliclyFunded(booleanOR(merge.getPubliclyFunded(), enrich.getPubliclyFunded()));
+
+		return merge;
+	}
+
+	private static String instanceKeyExtractor(Instance i) {
+		return String
+			.join(
+				"::",
+				kvKeyExtractor(i.getHostedby()),
+				kvKeyExtractor(i.getCollectedfrom()),
+				qualifierKeyExtractor(i.getAccessright()),
+				qualifierKeyExtractor(i.getInstancetype()),
+				Optional.ofNullable(i.getUrl()).map(u -> String.join("::", u)).orElse(null),
+				Optional
+					.ofNullable(i.getPid())
+					.map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("::")))
+					.orElse(null));
+	}
+
+	private static Instance instanceMerger(Instance i1, Instance i2) {
+		Instance i = new Instance();
+		i.setHostedby(i1.getHostedby());
+		i.setCollectedfrom(i1.getCollectedfrom());
+		i.setAccessright(i1.getAccessright());
+		i.setInstancetype(i1.getInstancetype());
+		i.setPid(mergeLists(i1.getPid(), i2.getPid(), 0, MergeUtils::spKeyExtractor, (sp1, sp2) -> sp1));
+		i
+			.setAlternateIdentifier(
+				mergeLists(
+					i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor,
+					(sp1, sp2) -> sp1));
+
+		i
+			.setRefereed(
+				Collections
+					.min(
+						Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()),
+						new RefereedComparator()));
+		i
+			.setInstanceTypeMapping(
+				mergeLists(
+					i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0,
+					MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1));
+		i.setFulltext(selectFulltext(i1.getFulltext(), i2.getFulltext()));
+		i.setDateofacceptance(selectOldestDate(i1.getDateofacceptance(), i2.getDateofacceptance()));
+		i.setLicense(firstNonNull(i1.getLicense(), i2.getLicense()));
+		i.setProcessingchargeamount(firstNonNull(i1.getProcessingchargeamount(), i2.getProcessingchargeamount()));
+		i.setProcessingchargecurrency(firstNonNull(i1.getProcessingchargecurrency(), i2.getProcessingchargecurrency()));
+		i
+			.setMeasures(
+				mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1));
+
+		i.setUrl(unionDistinctListOfString(i1.getUrl(), i2.getUrl()));
+
+		return i;
+	}
+
+	private static String measureKeyExtractor(Measure m) {
+		return String
+			.join(
+				"::",
+				m.getId(),
+				m
+					.getUnit()
+					.stream()
+					.map(KeyValue::getKey)
+					.collect(Collectors.joining("::")));
+	}
+
+	private static Field<String> selectOldestDate(Field<String> d1, Field<String> d2) {
+		return Stream
+			.of(d1, d2)
+			.filter(Objects::nonNull)
+			.min(
+				Comparator
+					.comparing(
+						f -> DateParserUtils
+							.parseDate(f.getValue())
+							.toInstant()
+							.atZone(ZoneId.systemDefault())
+							.toLocalDate()))
+			.orElse(d1);
+	}
+
+	private static String selectFulltext(String ft1, String ft2) {
+		if (StringUtils.endsWith(ft1, "pdf")) {
+			return ft1;
+		}
+		if (StringUtils.endsWith(ft2, "pdf")) {
+			return ft2;
+		}
+		return firstNonNull(ft1, ft2);
+	}
+
+	private static String instanceTypeMappingKeyExtractor(InstanceTypeMapping itm) {
+		return String
+			.join(
+				"::",
+				itm.getOriginalType(),
+				itm.getTypeCode(),
+				itm.getTypeLabel(),
+				itm.getVocabularyName());
+	}
+
+	private static String kvKeyExtractor(KeyValue kv) {
+		return Optional.ofNullable(kv).map(KeyValue::getKey).orElse(null);
+	}
+
+	private static String qualifierKeyExtractor(Qualifier q) {
+		return Optional.ofNullable(q).map(Qualifier::getClassid).orElse(null);
+	}
+
+	private static <T> T fieldKeyExtractor(Field<T> f) {
+		return Optional.ofNullable(f).map(Field::getValue).orElse(null);
+	}
+
+	private static String spKeyExtractor(StructuredProperty sp) {
+		return Optional
+			.ofNullable(sp)
+			.map(s -> Joiner.on("::").join(s, qualifierKeyExtractor(s.getQualifier())))
+			.orElse(null);
+	}
+
+	private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
+		int trust = compareTrust(original, enrich);
+		final T merge = mergeResult(original, enrich);
+
+		merge.setContactperson(unionDistinctLists(merge.getContactperson(), enrich.getContactperson(), trust));
+		merge.setContactgroup(unionDistinctLists(merge.getContactgroup(), enrich.getContactgroup(), trust));
+		merge.setTool(unionDistinctLists(merge.getTool(), enrich.getTool(), trust));
+
+		return merge;
+	}
+
+	private static <T extends Software> T mergeSoftware(T original, T enrich) {
+		int trust = compareTrust(original, enrich);
+		final T merge = mergeResult(original, enrich);
+
+		merge.setDocumentationUrl(unionDistinctLists(merge.getDocumentationUrl(), enrich.getDocumentationUrl(), trust));
+		merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust));
+		merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust));
+		merge
+			.setProgrammingLanguage(
+				chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust));
+
+		return merge;
+	}
+
+	private static <T extends Dataset> T mergeDataset(T original, T enrich) {
+		int trust = compareTrust(original, enrich);
+		T merge = mergeResult(original, enrich);
+
+		merge.setStoragedate(chooseReference(merge.getStoragedate(), enrich.getStoragedate(), trust));
+		merge.setDevice(chooseReference(merge.getDevice(), enrich.getDevice(), trust));
+		merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust));
+		merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust));
+		merge
+			.setLastmetadataupdate(
+				chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust));
+		merge
+			.setMetadataversionnumber(
+				chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust));
+		merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust));
+
+		return merge;
+	}
+
+	public static <T extends Publication> T mergePublication(T original, T enrich) {
+		final int trust = compareTrust(original, enrich);
+		T merged = mergeResult(original, enrich);
+
+		merged.setJournal(chooseReference(merged.getJournal(), enrich.getJournal(), trust));
+
+		return merged;
+	}
+
+	private static <T extends Organization> T mergeOrganization(T left, T enrich) {
+		int trust = compareTrust(left, enrich);
+		T merged = mergeOafEntityFields(left, enrich, trust);
+
+		merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust));
+		merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust));
+		merged
+			.setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust));
+		merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
+		merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust));
+		merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust));
+		merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust));
+		merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust));
+		merged
+			.setEcresearchorganization(
+				chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust));
+		merged
+			.setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust));
+		merged
+			.setEcinternationalorganizationeurinterests(
+				chooseReference(
+					merged.getEcinternationalorganizationeurinterests(),
+					enrich.getEcinternationalorganizationeurinterests(), trust));
+		merged
+			.setEcinternationalorganization(
+				chooseReference(
+					merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust));
+		merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust));
+		merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust));
+		merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust));
+		merged.setCountry(chooseReference(merged.getCountry(), enrich.getCountry(), trust));
+
+		return merged;
+	}
+
+	public static <T extends Project> T mergeProject(T original, T enrich) {
+		int trust = compareTrust(original, enrich);
+		T merged = mergeOafEntityFields(original, enrich, trust);
+
+		merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
+		merged.setCode(chooseReference(merged.getCode(), enrich.getCode(), trust));
+		merged.setAcronym(chooseReference(merged.getAcronym(), enrich.getAcronym(), trust));
+		merged.setTitle(chooseReference(merged.getTitle(), enrich.getTitle(), trust));
+		merged.setStartdate(chooseReference(merged.getStartdate(), enrich.getStartdate(), trust));
+		merged.setEnddate(chooseReference(merged.getEnddate(), enrich.getEnddate(), trust));
+		merged.setCallidentifier(chooseReference(merged.getCallidentifier(), enrich.getCallidentifier(), trust));
+		merged.setKeywords(chooseReference(merged.getKeywords(), enrich.getKeywords(), trust));
+		merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust));
+		merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust));
+		merged
+			.setOamandatepublications(
+				chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust));
+		merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust));
+		merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust));
+		merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust));
+		merged.setContracttype(chooseReference(merged.getContracttype(), enrich.getContracttype(), trust));
+		merged.setOptional1(chooseReference(merged.getOptional1(), enrich.getOptional1(), trust));
+		merged.setOptional2(chooseReference(merged.getOptional2(), enrich.getOptional2(), trust));
+		merged.setJsonextrainfo(chooseReference(merged.getJsonextrainfo(), enrich.getJsonextrainfo(), trust));
+		merged.setContactfullname(chooseReference(merged.getContactfullname(), enrich.getContactfullname(), trust));
+		merged.setContactfax(chooseReference(merged.getContactfax(), enrich.getContactfax(), trust));
+		merged.setContactphone(chooseReference(merged.getContactphone(), enrich.getContactphone(), trust));
+		merged.setContactemail(chooseReference(merged.getContactemail(), enrich.getContactemail(), trust));
+		merged.setSummary(chooseReference(merged.getSummary(), enrich.getSummary(), trust));
+		merged.setCurrency(chooseReference(merged.getCurrency(), enrich.getCurrency(), trust));
+
+		// missin in Project.merge
+		merged.setTotalcost(chooseReference(merged.getTotalcost(), enrich.getTotalcost(), trust));
+		merged.setFundedamount(chooseReference(merged.getFundedamount(), enrich.getFundedamount(), trust));
+
+		// trust ??
+		if (enrich.getH2020topiccode() != null && StringUtils.isEmpty(merged.getH2020topiccode())) {
+			merged.setH2020topiccode(enrich.getH2020topiccode());
+			merged.setH2020topicdescription(enrich.getH2020topicdescription());
+		}
+
+		merged
+			.setH2020classification(
+				unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust));
+
+		return merged;
+	}
+
+	/**
+	 * Longest lists list.
+	 *
+	 * @param a the a
+	 * @param b the b
+	 * @return the list
+	 */
+	public static List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
+		if (a == null || b == null)
+			return a == null ? b : a;
+
+		return a.size() >= b.size() ? a : b;
+	}
+
+	/**
+	 * This main method apply the enrichment of the instances
+	 *
+	 * @param toEnrichInstances   the instances that could be enriched
+	 * @param enrichmentInstances the enrichment instances
+	 * @return list of instances possibly enriched
+	 */
+	private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,
+		final List<Instance> enrichmentInstances) {
+		final List<Instance> enrichmentResult = new ArrayList<>();
+
+		if (toEnrichInstances == null) {
+			return enrichmentResult;
+		}
+		if (enrichmentInstances == null) {
+			return enrichmentResult;
+		}
+		Map<String, Instance> ri = toInstanceMap(enrichmentInstances);
+
+		toEnrichInstances.forEach(i -> {
+			final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
+			if (e != null && e.size() > 0) {
+				e.forEach(enr -> applyEnrichment(i, enr));
+			} else {
+				final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
+				if (a != null && a.size() > 0) {
+					a.forEach(enr -> applyEnrichment(i, enr));
+				}
+			}
+			enrichmentResult.add(i);
+		});
+		return enrichmentResult;
+	}
+
+	/**
+	 * This method converts the list of instance enrichments
+	 * into a Map where the key is the normalized identifier
+	 * and the value is the instance itself
+	 *
+	 * @param ri the list of enrichment instances
+	 * @return the result map
+	 */
+	private static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
+		return ri
+			.stream()
+			.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
+			.flatMap(i -> {
+				final List<Pair<String, Instance>> result = new ArrayList<>();
+				if (i.getPid() != null)
+					i
+						.getPid()
+						.stream()
+						.filter(MergeUtils::validPid)
+						.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
+				if (i.getAlternateIdentifier() != null)
+					i
+						.getAlternateIdentifier()
+						.stream()
+						.filter(MergeUtils::validPid)
+						.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
+				return result.stream();
+			})
+			.collect(
+				Collectors
+					.toMap(
+						Pair::getLeft,
+						Pair::getRight,
+						(a, b) -> a));
+	}
+
+	private static boolean isFromDelegatedAuthority(Result r) {
+		return Optional
+			.ofNullable(r.getInstance())
+			.map(
+				instance -> instance
+					.stream()
+					.filter(i -> Objects.nonNull(i.getCollectedfrom()))
+					.map(i -> i.getCollectedfrom().getKey())
+					.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
+			.orElse(false);
+	}
+
+	/**
+	 * Valid pid boolean.
+	 *
+	 * @param p the p
+	 * @return the boolean
+	 */
+	private static boolean validPid(final StructuredProperty p) {
+		return p.getValue() != null && p.getQualifier() != null && p.getQualifier().getClassid() != null;
+	}
+
+	/**
+	 * Normalize pid string.
+	 *
+	 * @param pid the pid
+	 * @return the string
+	 */
+	private static String extractKeyFromPid(final StructuredProperty pid) {
+		if (pid == null)
+			return null;
+		final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
+
+		return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
+	}
+
+	/**
+	 * This utility method finds the list of enrichment instances
+	 * that match one or more PIDs in the input list
+	 *
+	 * @param pids        the list of PIDs
+	 * @param enrichments the List of enrichment instances having the same pid
+	 * @return the list
+	 */
+	private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids,
+		final Map<String, Instance> enrichments) {
+		if (pids == null || enrichments == null)
+			return null;
+		return pids
+			.stream()
+			.map(MergeUtils::extractKeyFromPid)
+			.map(enrichments::get)
+			.filter(Objects::nonNull)
+			.collect(Collectors.toList());
+	}
+
+	/**
+	 * Is an enrichment boolean.
+	 *
+	 * @param e the e
+	 * @return the boolean
+	 */
+	private static boolean isAnEnrichment(OafEntity e) {
+		return e.getDataInfo() != null &&
+			e.getDataInfo().getProvenanceaction() != null
+			&& ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid());
+	}
+
+	/**
+	 * This method apply enrichment on a single instance
+	 * The enrichment consists of replacing values on
+	 * single attribute only if in the current instance is missing
+	 * The only repeatable field enriched is measures
+	 *
+	 * @param merge the current instance
+	 * @param enrichment      the enrichment instance
+	 */
+	private static void applyEnrichment(final Instance merge, final Instance enrichment) {
+		if (merge == null || enrichment == null)
+			return;
+
+		merge.setLicense(firstNonNull(merge.getLicense(), enrichment.getLicense()));
+		merge.setAccessright(firstNonNull(merge.getAccessright(), enrichment.getAccessright()));
+		merge.setInstancetype(firstNonNull(merge.getInstancetype(), enrichment.getInstancetype()));
+		merge.setInstanceTypeMapping(firstNonNull(merge.getInstanceTypeMapping(), enrichment.getInstanceTypeMapping()));
+		merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby()));
+		merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0));
+		merge
+			.setDistributionlocation(
+				firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation()));
+		merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom()));
+		// pid and alternateId are used for matching
+		merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance()));
+		merge
+			.setProcessingchargeamount(
+				firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount()));
+		merge
+			.setProcessingchargecurrency(
+				firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency()));
+		merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed()));
+		merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0));
+		merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext()));
+	}
+
+	private static int compareTrust(Oaf a, Oaf b) {
+		String left = Optional
+			.ofNullable(a.getDataInfo())
+			.map(DataInfo::getTrust)
+			.orElse("0.0");
+
+		String right = Optional
+			.ofNullable(b.getDataInfo())
+			.map(DataInfo::getTrust)
+			.orElse("0.0");
+
+		return left.compareTo(right);
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java
@ -0,0 +1,27 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+public class ModelHardLimits {
+
+	private ModelHardLimits() {
+	}
+
+	public static final String LAYOUT = "index";
+	public static final String INTERPRETATION = "openaire";
+	public static final String SEPARATOR = "-";
+
+	public static final int MAX_EXTERNAL_ENTITIES = 50;
+	public static final int MAX_AUTHORS = 200;
+	public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
+	public static final int MAX_TITLE_LENGTH = 5000;
+	public static final int MAX_TITLES = 10;
+	public static final int MAX_ABSTRACTS = 10;
+	public static final int MAX_ABSTRACT_LENGTH = 150000;
+	public static final int MAX_RELATED_ABSTRACT_LENGTH = 500;
+	public static final int MAX_INSTANCES = 10;
+
+	public static String getCollectionName(String format) {
+		return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@ -14,7 +14,6 @@ import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;

 import eu.dnetlib.dhp.schema.common.AccessRightComparator;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;

 public class OafMapperUtils {
@ -22,65 +21,6 @@ public class OafMapperUtils {
 	private OafMapperUtils() {
 	}

-	public static Oaf merge(final Oaf left, final Oaf right) {
-		if (ModelSupport.isSubClass(left, OafEntity.class)) {
-			return mergeEntities((OafEntity) left, (OafEntity) right);
-		} else if (ModelSupport.isSubClass(left, Relation.class)) {
-			((Relation) left).mergeFrom((Relation) right);
-		} else {
-			throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
-		}
-		return left;
-	}
-
-	public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
-		if (ModelSupport.isSubClass(left, Result.class)) {
-			return mergeResults((Result) left, (Result) right);
-		} else if (ModelSupport.isSubClass(left, Datasource.class)) {
-			left.mergeFrom(right);
-		} else if (ModelSupport.isSubClass(left, Organization.class)) {
-			left.mergeFrom(right);
-		} else if (ModelSupport.isSubClass(left, Project.class)) {
-			left.mergeFrom(right);
-		} else {
-			throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
-		}
-		return left;
-	}
-
-	public static Result mergeResults(Result left, Result right) {
-
-		final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
-		final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
-
-		if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
-			return left;
-		}
-		if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
-			return right;
-		}
-
-		if (new ResultTypeComparator().compare(left, right) < 0) {
-			left.mergeFrom(right);
-			return left;
-		} else {
-			right.mergeFrom(left);
-			return right;
-		}
-	}
-
-	private static boolean isFromDelegatedAuthority(Result r) {
-		return Optional
-			.ofNullable(r.getInstance())
-			.map(
-				instance -> instance
-					.stream()
-					.filter(i -> Objects.nonNull(i.getCollectedfrom()))
-					.map(i -> i.getCollectedfrom().getKey())
-					.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
-			.orElse(false);
-	}
-
 	public static KeyValue keyValue(final String k, final String v) {
 		final KeyValue kv = new KeyValue();
 		kv.setKey(k);
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java
@ -0,0 +1,38 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class OrganizationPidComparator implements Comparator<StructuredProperty> {
+
+	@Override
+	public int compare(StructuredProperty left, StructuredProperty right) {
+
+		PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
+		PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
+
+		if (lClass.equals(PidType.openorgs))
+			return -1;
+		if (rClass.equals(PidType.openorgs))
+			return 1;
+
+		if (lClass.equals(PidType.GRID))
+			return -1;
+		if (rClass.equals(PidType.GRID))
+			return 1;
+
+		if (lClass.equals(PidType.mag_id))
+			return -1;
+		if (rClass.equals(PidType.mag_id))
+			return 1;
+
+		if (lClass.equals(PidType.urn))
+			return -1;
+		if (rClass.equals(PidType.urn))
+			return 1;
+
+		return 0;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java
@ -0,0 +1,8 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.HashMap;
+import java.util.HashSet;
+
+public class PidBlacklist extends HashMap<String, HashSet<String>> {
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java
@ -0,0 +1,40 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class PidBlacklistProvider {
+
+	private static final PidBlacklist blacklist;
+
+	static {
+		try {
+			String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
+			blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
+
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public static PidBlacklist getBlacklist() {
+		return blacklist;
+	}
+
+	public static Set<String> getBlacklist(String pidType) {
+		return Optional
+			.ofNullable(getBlacklist().get(pidType))
+			.orElse(new HashSet<>());
+	}
+
+	private PidBlacklistProvider() {
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
@ -0,0 +1,48 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
+
+	private final T entity;
+
+	public PidComparator(T entity) {
+		this.entity = entity;
+	}
+
+	@Override
+	public int compare(StructuredProperty left, StructuredProperty right) {
+
+		if (left == null && right == null)
+			return 0;
+		if (left == null)
+			return 1;
+		if (right == null)
+			return -1;
+
+		if (ModelSupport.isSubClass(entity, Result.class)) {
+			return compareResultPids(left, right);
+		}
+		if (ModelSupport.isSubClass(entity, Organization.class)) {
+			return compareOrganizationtPids(left, right);
+		}
+
+		// Else (but unlikely), lexicographical ordering will do.
+		return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
+	}
+
+	private int compareResultPids(StructuredProperty left, StructuredProperty right) {
+		return new ResultPidComparator().compare(left, right);
+	}
+
+	private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
+		return new OrganizationPidComparator().compare(left, right);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java
@ -0,0 +1,79 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import org.apache.commons.lang3.EnumUtils;
+
+public enum PidType {
+
+	/**
+	 * The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
+	 *
+	 * There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
+	 *
+	 * The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
+	 * of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
+	 * defined for an application by the ISO 26324 Registration Authority.
+	 *
+	 *
+	 * DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
+	 * These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
+	 * distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
+	 * resolution system.
+	 *
+	 * Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
+	 * unique string assigned to a registrant.
+	 *
+	 * DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
+	 * Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
+	 * or it might incorporate an identifier generated from or based on another system used by the registrant
+	 * (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
+	 * specified, as in Example 1).
+	 *
+	 * Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
+	 */
+	doi,
+
+	/**
+	 * PubMed Unique Identifier (PMID)
+	 *
+	 * This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
+	 * accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
+	 *
+	 * Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
+	 * (e.g., 21804956.2). All citations are considered version 1 until replaced.  The extended PMID is not displayed
+	 * on the MEDLINE format.
+	 *
+	 * View the citation in abstract format in PubMed to access additional versions when available (see the article in
+	 * the Jan-Feb 2012 NLM Technical Bulletin).
+	 *
+	 * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
+	 */
+	pmid,
+
+	/**
+	 * This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
+	 * prefix PMC.
+	 *
+	 * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
+	 */
+	pmc, handle, arXiv, nct, pdb, w3id,
+
+	// Organization
+	openorgs, ROR, GRID, PIC, ISNI, Wikidata, FundRef, corda, corda_h2020, mag_id, urn,
+
+	// Used by dedup
+	undefined, original;
+
+	public static boolean isValid(String type) {
+		return EnumUtils.isValidEnum(PidType.class, type);
+	}
+
+	public static PidType tryValueOf(String s) {
+		try {
+			return PidType.valueOf(s);
+		} catch (Exception e) {
+			return PidType.original;
+		}
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java
@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+import java.util.Optional;
+
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class PidValueComparator implements Comparator<StructuredProperty> {
+
+	@Override
+	public int compare(StructuredProperty left, StructuredProperty right) {
+
+		if (left == null && right == null)
+			return 0;
+		if (left == null)
+			return 1;
+		if (right == null)
+			return -1;
+
+		StructuredProperty l = CleaningFunctions.normalizePidValue(left);
+		StructuredProperty r = CleaningFunctions.normalizePidValue(right);
+
+		return Optional
+			.ofNullable(l.getValue())
+			.map(
+				lv -> Optional
+					.ofNullable(r.getValue())
+					.map(rv -> lv.compareTo(rv))
+					.orElse(-1))
+			.orElse(1);
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RefereedComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RefereedComparator.java
@ -0,0 +1,39 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+/**
+ * Comparator for sorting the values from the dnet:review_levels vocabulary, implements the following ordering
+ *
+ * peerReviewed (0001) > nonPeerReviewed (0002) > UNKNOWN (0000)
+ */
+public class RefereedComparator implements Comparator<Qualifier> {
+
+	@Override
+	public int compare(Qualifier left, Qualifier right) {
+
+		String lClass = left.getClassid();
+		String rClass = right.getClassid();
+
+		if ("0001".equals(lClass))
+			return -1;
+		if ("0001".equals(rClass))
+			return 1;
+
+		if ("0002".equals(lClass))
+			return -1;
+		if ("0002".equals(rClass))
+			return 1;
+
+		if ("0000".equals(lClass))
+			return -1;
+		if ("0000".equals(rClass))
+			return 1;
+
+		return 0;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java
@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class ResultPidComparator implements Comparator<StructuredProperty> {
+
+	@Override
+	public int compare(StructuredProperty left, StructuredProperty right) {
+
+		PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
+		PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
+
+		if (lClass.equals(PidType.doi))
+			return -1;
+		if (rClass.equals(PidType.doi))
+			return 1;
+
+		if (lClass.equals(PidType.pmid))
+			return -1;
+		if (rClass.equals(PidType.pmid))
+			return 1;
+
+		if (lClass.equals(PidType.pmc))
+			return -1;
+		if (rClass.equals(PidType.pmc))
+			return 1;
+
+		if (lClass.equals(PidType.handle))
+			return -1;
+		if (rClass.equals(PidType.handle))
+			return 1;
+
+		if (lClass.equals(PidType.arXiv))
+			return -1;
+		if (rClass.equals(PidType.arXiv))
+			return 1;
+
+		if (lClass.equals(PidType.nct))
+			return -1;
+		if (rClass.equals(PidType.nct))
+			return 1;
+
+		if (lClass.equals(PidType.pdb))
+			return -1;
+		if (rClass.equals(PidType.pdb))
+			return 1;
+
+		return 0;
+	}
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java
@ -0,0 +1,77 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class ResultTypeComparator implements Comparator<Result> {
+
+	@Override
+	public int compare(Result left, Result right) {
+
+		if (left == null && right == null)
+			return 0;
+		if (left == null)
+			return 1;
+		if (right == null)
+			return -1;
+
+		HashSet<String> lCf = getCollectedFromIds(left);
+		HashSet<String> rCf = getCollectedFromIds(right);
+
+		if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
+			return -1;
+		}
+		if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
+			return 1;
+		}
+
+		String lClass = left.getResulttype().getClassid();
+		String rClass = right.getResulttype().getClassid();
+
+		if (lClass.equals(rClass))
+			return 0;
+
+		if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
+			return -1;
+		if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
+			return 1;
+
+		if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
+			return -1;
+		if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
+			return 1;
+
+		if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
+			return -1;
+		if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
+			return 1;
+
+		if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
+			return -1;
+		if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
+			return 1;
+
+		// Else (but unlikely), lexicographical ordering will do.
+		return lClass.compareTo(rClass);
+	}
+
+	protected HashSet<String> getCollectedFromIds(Result left) {
+		return Optional
+			.ofNullable(left.getCollectedfrom())
+			.map(
+				cf -> cf
+					.stream()
+					.map(KeyValue::getKey)
+					.collect(Collectors.toCollection(HashSet::new)))
+			.orElse(new HashSet<>());
+	}
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java
@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Set;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class BlackListProviderTest {
+
+	@Test
+	void blackListTest() {
+
+		Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
+		Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
+		Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
+		final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
+		Assertions.assertNotNull(xxx);
+		Assertions.assertEquals(0, xxx.size());
+	}
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
@ -0,0 +1,87 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.IOException;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.Publication;
+
+class IdentifierFactoryTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+	@Test
+	void testCreateIdentifierForPublication() throws IOException {
+
+		verifyIdentifier(
+			"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
+
+		verifyIdentifier(
+			"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
+
+		verifyIdentifier(
+			"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
+
+		verifyIdentifier(
+			"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
+
+		verifyIdentifier(
+			"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
+
+		verifyIdentifier(
+			"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
+
+		verifyIdentifier(
+			"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
+
+		verifyIdentifier(
+			"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
+
+		final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
+		verifyIdentifier("publication_3.json", defaultID, true);
+		verifyIdentifier("publication_4.json", defaultID, true);
+		verifyIdentifier("publication_5.json", defaultID, true);
+
+	}
+
+	@Test
+	void testCreateIdentifierForPublicationNoHash() throws IOException {
+
+		verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
+		verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
+		verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
+		verifyIdentifier(
+			"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
+
+		final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
+		verifyIdentifier("publication_3.json", defaultID, false);
+		verifyIdentifier("publication_4.json", defaultID, false);
+		verifyIdentifier("publication_5.json", defaultID, false);
+	}
+
+	@Test
+	void testCreateIdentifierForROHub() throws IOException {
+		verifyIdentifier(
+			"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
+	}
+
+	protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
+		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
+		final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
+
+		String id = IdentifierFactory.createIdentifier(pub, md5);
+		System.out.println(id);
+		assertNotNull(id);
+		assertEquals(expectedID, id);
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtilsTest.java
@ -0,0 +1,130 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.commons.beanutils.BeanUtils;
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+public class MergeUtilsTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+	@Test
+	void testMergePubs_new() throws IOException {
+		Publication pt = read("publication_test.json", Publication.class);
+		Publication p1 = read("publication_test.json", Publication.class);
+
+		assertEquals(1, pt.getCollectedfrom().size());
+		assertEquals(ModelConstants.CROSSREF_ID, pt.getCollectedfrom().get(0).getKey());
+
+		Instance i = new Instance();
+		i.setUrl(Lists.newArrayList("https://..."));
+		p1.getInstance().add(i);
+
+		Publication ptp1 = MergeUtils.mergePublication(pt, p1);
+
+		assertNotNull(ptp1.getInstance());
+		assertEquals(2, ptp1.getInstance().size());
+
+	}
+
+	@Test
+	void testMergePubs() throws IOException {
+		Publication p1 = read("publication_1.json", Publication.class);
+		Publication p2 = read("publication_2.json", Publication.class);
+		Dataset d1 = read("dataset_1.json", Dataset.class);
+		Dataset d2 = read("dataset_2.json", Dataset.class);
+
+		assertEquals(1, p1.getCollectedfrom().size());
+		assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
+		assertEquals(1, d2.getCollectedfrom().size());
+		assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		assertEquals(1, p2.getCollectedfrom().size());
+		assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+		assertEquals(1, d1.getCollectedfrom().size());
+		assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
+
+		final Result p1d2 = MergeUtils.checkedMerge(p1, d2);
+		assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype().getClassid());
+		assertTrue(p1d2 instanceof Publication);
+		assertEquals(p1.getId(), p1d2.getId());
+	}
+
+	@Test
+	void testMergePubs_1() throws IOException {
+		Publication p2 = read("publication_2.json", Publication.class);
+		Dataset d1 = read("dataset_1.json", Dataset.class);
+
+		final Result p2d1 = MergeUtils.checkedMerge(p2, d1);
+		assertEquals((ModelConstants.DATASET_RESULTTYPE_CLASSID), p2d1.getResulttype().getClassid());
+		assertTrue(p2d1 instanceof Dataset);
+		assertEquals(d1.getId(), p2d1.getId());
+		assertEquals(2, p2d1.getCollectedfrom().size());
+	}
+
+	@Test
+	void testMergePubs_2() throws IOException {
+		Publication p1 = read("publication_1.json", Publication.class);
+		Publication p2 = read("publication_2.json", Publication.class);
+
+		Result p1p2 = MergeUtils.checkedMerge(p1, p2);
+		assertTrue(p1p2 instanceof Publication);
+		assertEquals(p1.getId(), p1p2.getId());
+		assertEquals(2, p1p2.getCollectedfrom().size());
+	}
+
+	@Test
+	void testDelegatedAuthority_1() throws IOException {
+		Dataset d1 = read("dataset_2.json", Dataset.class);
+		Dataset d2 = read("dataset_delegated.json", Dataset.class);
+
+		assertEquals(1, d2.getCollectedfrom().size());
+		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
+
+		Result res = (Result) MergeUtils.merge(d1, d2, true);
+
+		assertEquals(d2, res);
+	}
+
+	@Test
+	void testDelegatedAuthority_2() throws IOException {
+		Dataset p1 = read("publication_1.json", Dataset.class);
+		Dataset d2 = read("dataset_delegated.json", Dataset.class);
+
+		assertEquals(1, d2.getCollectedfrom().size());
+		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
+
+		Result res = (Result) MergeUtils.merge(p1, d2, true);
+
+		assertEquals(d2, res);
+	}
+
+	protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
+		return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
+	}
+
+	protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
+		final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
+		return OBJECT_MAPPER.readValue(json, clazz);
+	}
+
+}
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@ -149,7 +149,7 @@ class OafMapperUtilsTest {
 	void testDate() {
 		final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
 		assertNotNull(date);
-		System.out.println(date);
+		assertEquals("1998-02-23", date);
 	}

 	@Test
@ -166,8 +166,8 @@ class OafMapperUtilsTest {

 		assertEquals(
 			ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
-			OafMapperUtils
-				.mergeResults(p1, d2)
+			MergeUtils
+				.mergeResult(p1, d2)
 				.getResulttype()
 				.getClassid());

@ -178,10 +178,10 @@ class OafMapperUtilsTest {

 		assertEquals(
 			ModelConstants.DATASET_RESULTTYPE_CLASSID,
-			OafMapperUtils
-				.mergeResults(p2, d1)
-				.getResulttype()
-				.getClassid());
+			((Result) MergeUtils
+				.merge(p2, d1))
+					.getResulttype()
+					.getClassid());
 	}

 	@Test
@ -192,7 +192,7 @@ class OafMapperUtilsTest {
 		assertEquals(1, d2.getCollectedfrom().size());
 		assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));

-		Result res = OafMapperUtils.mergeResults(d1, d2);
+		Result res = MergeUtils.mergeResult(d1, d2);

 		assertEquals(d2, res);

--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json
@ -0,0 +1,12 @@
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
+{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json
@ -0,0 +1 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json
@ -0,0 +1,33 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
+        "value": "Crossref"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        }
+      ]
+    },
+    {
+      "pid": [
+        {
+          "qualifier": {"classid": "urn"},
+          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+        },
+        {
+          "qualifier": {"classid": "scp-number"},
+          "value": "79953761260"
+        },
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json
@ -0,0 +1,37 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
+        "value": "Crossref"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        }
+      ]
+    },
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
+        "value": "Europe PubMed Central"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "urn"},
+          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+        },
+        {
+          "qualifier": {"classid": "scp-number"},
+          "value": "79953761260"
+        },
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json
@ -0,0 +1,37 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        }
+      ]
+    },
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
+        "value": "Europe PubMed Central"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "urn"},
+          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+        },
+        {
+          "qualifier": {"classid": "scp-number"},
+          "value": "79953761260"
+        },
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json
@ -0,0 +1,37 @@
+{
+  "id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        },
+        {
+          "qualifier": {"classid": "handle"},
+          "value": "11012/83840"
+        }
+      ]
+    },
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::2852",
+        "value": "Digital library of Brno University of Technology"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        },
+        {
+          "qualifier": {"classid": "handle"},
+          "value": "11012/83840"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json
@ -0,0 +1,37 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+        "value": "Zenodo"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.5281/zenodo.5121485"
+        }
+      ]
+    },
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
+        "value": "Europe PubMed Central"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "urn"},
+          "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+        },
+        {
+          "qualifier": {"classid": "scp-number"},
+          "value": "79953761260"
+        },
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_1.json
@ -0,0 +1,3 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor":  "gold", "isInDiamondJournal": null, "publiclyFunded": null}
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor":  "gold", "isInDiamondJournal": true, "publiclyFunded": false }
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor":  null, "isInDiamondJournal": true, "publiclyFunded": false }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_irish_tender_2.json
@ -0,0 +1,3 @@
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor":  "gold", "isInDiamondJournal": null, "publiclyFunded": null}
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor":  "bronze", "isInDiamondJournal": true, "publiclyFunded": false }
+{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor":  null, "isInDiamondJournal": true, "publiclyFunded": false }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_openapc.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_openapc.json
@ -0,0 +1,31 @@
+{
+  "id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
+  "resuttype": {
+    "classid": "publication"
+  },
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
+        "value": "OpenAPC Global Initiative"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        },
+        {
+          "qualifier": {"classid": "pmc"},
+          "value": "21459329"
+        },
+        {
+          "qualifier": {"classid": "pmid"},
+          "value": "25811027"
+        }
+      ],
+      "url":["https://doi.org/10.1155/2015/439379"]
+    }
+  ]
+}
+
+
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json
@ -0,0 +1,17 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "pid": [
+    {
+      "qualifier": {"classid": "urn"},
+      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+    },
+    {
+      "qualifier": {"classid": "scp-number"},
+      "value": "79953761260"
+    },
+    {
+      "qualifier": {"classid": "pmc"},
+      "value": "21459329"
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json
@ -0,0 +1,21 @@
+{
+  "id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "instance": [
+    {
+      "collectedfrom": {
+        "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
+        "value": "Europe PubMed Central"
+      },
+      "pid": [
+        {
+          "qualifier": {"classid": "doi"},
+          "value": "10.1016/j.cmet.2010.03.013"
+        },
+        {
+          "qualifier":{"classid":"pmc"},
+          "value":"21459329"
+        }
+      ]
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_test.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_test.json
@ -0,0 +1,428 @@
+{
+  "author": [
+    {
+      "affiliation": null,
+      "fullname": "Deymier, Ghislaine",
+      "name": "Ghislaine",
+      "pid": [],
+      "rank": 1,
+      "surname": "Deymier"
+    },
+    {
+      "affiliation": null,
+      "fullname": "Gaschet, Frédéric",
+      "name": "Frédéric",
+      "pid": [],
+      "rank": 2,
+      "surname": "Gaschet"
+    },
+    {
+      "affiliation": null,
+      "fullname": "Pouyanne, Guillaume",
+      "name": "Guillaume",
+      "pid": [],
+      "rank": 3,
+      "surname": "Pouyanne"
+    }
+  ],
+  "bestaccessright": {
+    "classid": "OPEN",
+    "classname": "Open Access",
+    "schemeid": "dnet:access_modes",
+    "schemename": "dnet:access_modes"
+  },
+  "collectedfrom": [
+    {
+      "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
+      "value" : "Crossref"
+    }
+  ],
+  "context": [],
+  "contributor": [],
+  "country": [],
+  "coverage": [],
+  "dataInfo": {
+    "deletedbyinference": false,
+    "inferred": false,
+    "invisible": false,
+    "provenanceaction": {
+      "classid": "sysimport:crosswalk:repository",
+      "classname": "Harvested",
+      "schemeid": "dnet:provenanceActions",
+      "schemename": "dnet:provenanceActions"
+    },
+    "trust": "0.9"
+  },
+  "dateofacceptance": {
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "sysimport:crosswalk:repository",
+        "classname": "Harvested",
+        "schemeid": "dnet:provenanceActions",
+        "schemename": "dnet:provenanceActions"
+      },
+      "trust": "0.9"
+    },
+    "value": "2013-11-30"
+  },
+  "dateofcollection": "2024-02-28T00:22:13+0000",
+  "dateoftransformation": "2024-03-06T08:43:13.253Z",
+  "description": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "value": "For analyzing the reciprocal interaction between urban sprawl and car use, research has first focused on the link between urban density and mobility. By looking for a reduction in energy consumption, cities have favoured a compact planning development. Then reflection has broadened from the simple density to the wider, multi-dimensional concept of urban form. This controversy has led to a renewal of analysis in term of the costs of urban growth, notably by comparing the costs of \"compact\" and \"sprawled\" development. The idea is to compare the mobility costs of different urban forms. However, most often because of a lack of data, such studies are scarce. This paper suggests an innovative method to compute mobility costs at an infra-urban scale : The Spatialized Travel Account (STA). It is based on the CERTU's travel account methodology at a metropolitan scale. It puts forward an accurate estimate of the mobility costs for each transport mode (individual and public) and for each type of payer (households, firms, local authorities...). In order to test the relationships between mobility costs and urban form, we link the computed costs to morphological characteristics of infra-urban zones, taking in account sociodemographic characteristics of households."
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "value": "L'interaction réciproque entre étalement urbain et usage de l'automobile a conduit la recherche à se focaliser sur le lien entre les densités urbaines et la mobilité. En cherchant à réduire leur consommation d'énergie pour les transports, et donc leurs émissions de Gaz à Effet de Serre, les villes ont alors cherché à planifier la \" ville compacte \", privilégiant notamment la reconstruction de la ville sur elle-même et la densification. Par la suite, la réflexion s'est élargie de la simple densité à la notion de forme urbaine et à toutes ses dimensions. Cette controverse devait conduire à un renouveau des analyses en termes de coûts de la croissance urbaine : le débat reste vif, encore aujourd'hui, sur les coûts comparés de la ville étalée et de la ville compacte. Plus largement, il s'agit d'explorer les coûts des différentes formes urbaines en termes de mobilité. Malgré cela, généralement pour des raisons de disponibilité de données, les études sur le sujet restent extrêmement rares. Cet article propose un outil novateur pour mesurer les coûts de la mobilité à l'échelle intraurbaine : le Compte Déplacements Territorialisé (CDT). Il s'inspire de la méthode développée par le CERTU pour l'établissement des Comptes Déplacements Voyageurs à l'échelle métropolitaine. Le CDT propose, pour chacune des zones de l'agglomération, une estimation précise de l'ensemble des coûts liés aux déplacements de personnes, ventilés par mode de transport (individuels et collectifs) et par type de financeurs (ménages, entreprises, collectivités territoriales, etc.). Nous proposons une application de cette méthode à la controverse sur le lien entre forme urbaine et coûts de la mobilité. Les coûts sont reliés aux caractéristiques morphologiques des zones (en termes de densité et de diversité, notamment), en prenant soin de contrôler les facteurs socio-économiques qui influent traditionnellement sur les comportements de mobilité (taille du ménage, revenu, etc.)."
+    }
+  ],
+  "eoscifguidelines": [],
+  "externalReference": [],
+  "extraInfo": [],
+  "format": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "value": "application/pdf"
+    }
+  ],
+  "fulltext": [],
+  "id": "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca",
+  "instance": [
+    {
+      "accessright": {
+        "classid": "OPEN",
+        "classname": "Open Access",
+        "schemeid": "dnet:access_modes",
+        "schemename": "dnet:access_modes"
+      },
+      "alternateIdentifier": [
+        {
+          "dataInfo": {
+            "deletedbyinference": false,
+            "inferred": false,
+            "invisible": false,
+            "provenanceaction": {
+              "classid": "sysimport:crosswalk:repository",
+              "classname": "Harvested",
+              "schemeid": "dnet:provenanceActions",
+              "schemename": "dnet:provenanceActions"
+            },
+            "trust": "0.9"
+          },
+          "qualifier": {
+            "classid": "doi",
+            "classname": "Digital Object Identifier",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "10.46298/cst.12132"
+        }
+      ],
+      "collectedfrom": {
+        "key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
+        "value": "Episciences"
+      },
+      "dateofacceptance": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:repository",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "trust": "0.9"
+        },
+        "value": "2013-11-30"
+      },
+      "hostedby": {
+        "key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
+        "value": "Episciences"
+      },
+      "instanceTypeMapping": [
+        {
+          "originalType": "http://purl.org/coar/resource_type/c_6501",
+          "typeCode": "http://purl.org/coar/resource_type/c_6501",
+          "typeLabel": "journal article",
+          "vocabularyName": "openaire::coar_resource_types_3_1"
+        },
+        {
+          "originalType": "http://purl.org/coar/resource_type/c_6501",
+          "typeCode": "Article",
+          "typeLabel": "Article",
+          "vocabularyName": "openaire::user_resource_types"
+        }
+      ],
+      "instancetype": {
+        "classid": "0001",
+        "classname": "Article",
+        "schemeid": "dnet:publication_resource",
+        "schemename": "dnet:publication_resource"
+      },
+      "license": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "sysimport:crosswalk:repository",
+            "classname": "Harvested",
+            "schemeid": "dnet:provenanceActions",
+            "schemename": "dnet:provenanceActions"
+          },
+          "trust": "0.9"
+        },
+        "value": "CC BY NC SA"
+      },
+      "pid": [],
+      "refereed": {
+        "classid": "0002",
+        "classname": "nonPeerReviewed",
+        "schemeid": "dnet:review_levels",
+        "schemename": "dnet:review_levels"
+      },
+      "url": [
+        "https://doi.org/10.46298/cst.12132",
+        "https://cst.episciences.org/12132"
+      ]
+    }
+  ],
+  "language": {
+    "classid": "fra/fre",
+    "classname": "French",
+    "schemeid": "dnet:languages",
+    "schemename": "dnet:languages"
+  },
+  "lastupdatetimestamp": 1710636106633,
+  "metaResourceType": {
+    "classid": "Research Literature",
+    "classname": "Research Literature",
+    "schemeid": "openaire::meta_resource_types",
+    "schemename": "openaire::meta_resource_types"
+  },
+  "originalId": [
+    "oai:episciences.org:cst:12132",
+    "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca"
+  ],
+  "pid": [],
+  "publisher": {
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "sysimport:crosswalk:repository",
+        "classname": "Harvested",
+        "schemeid": "dnet:provenanceActions",
+        "schemename": "dnet:provenanceActions"
+      },
+      "trust": "0.9"
+    },
+    "value": "episciences.org"
+  },
+  "relevantdate": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "Accepted",
+        "classname": "Accepted",
+        "schemeid": "dnet:dataCite_date",
+        "schemename": "dnet:dataCite_date"
+      },
+      "value": "2024-02-11"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "issued",
+        "classname": "issued",
+        "schemeid": "dnet:dataCite_date",
+        "schemename": "dnet:dataCite_date"
+      },
+      "value": "2013-11-30"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "available",
+        "classname": "available",
+        "schemeid": "dnet:dataCite_date",
+        "schemename": "dnet:dataCite_date"
+      },
+      "value": "2013-11-30"
+    }
+  ],
+  "resourcetype": {
+    "classid": "journal article",
+    "classname": "journal article",
+    "schemeid": "dnet:dataCite_resource",
+    "schemename": "dnet:dataCite_resource"
+  },
+  "resulttype": {
+    "classid": "publication",
+    "classname": "publication",
+    "schemeid": "dnet:result_typologies",
+    "schemename": "dnet:result_typologies"
+  },
+  "source": [],
+  "subject": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "JEL: H - Public Economics/H.H7 - State and Local Government • Intergovernmental Relations/H.H7.H72 - State and Local Budget and Expenditures"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Local public finance"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "JEL: R - Urban, Rural, Regional, Real Estate, and Transportation Economics/R.R5 - Regional Government Analysis/R.R5.R51 - Finance in Urban and Rural Economies"
+    }
+  ],
+  "title": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "sysimport:crosswalk:repository",
+          "classname": "Harvested",
+          "schemeid": "dnet:provenanceActions",
+          "schemename": "dnet:provenanceActions"
+        },
+        "trust": "0.9"
+      },
+      "qualifier": {
+        "classid": "main title",
+        "classname": "main title",
+        "schemeid": "dnet:dataCite_title",
+        "schemename": "dnet:dataCite_title"
+      },
+      "value": "Urban form and the costs of daily mobility. The spatialized travel account tool and its application to the Bordeaux metropolitan area"
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json
@ -0,0 +1,23 @@
+{
+  "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
+  "pid": [
+    {
+      "qualifier": {
+        "classid": "urn"
+      },
+      "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+    },
+    {
+      "qualifier": {
+        "classid": "scp-number"
+      },
+      "value": "79953761260"
+    },
+    {
+      "qualifier": {
+        "classid": "pmcid"
+      },
+      "value": "21459329"
+    }
+  ]
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publications.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publications.json
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java
@ -7,8 +7,7 @@ import java.util.function.BiFunction;

 import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;

 /** OAF model merging support. */
 public class MergeAndGet {
@ -46,20 +45,7 @@ public class MergeAndGet {
 	}

 	private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) {
-		if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
-			((Relation) x).mergeFrom((Relation) y);
-			return x;
-		} else if (isSubClass(x, OafEntity.class)
-			&& isSubClass(y, OafEntity.class)
-			&& isSubClass(x, y)) {
-			((OafEntity) x).mergeFrom((OafEntity) y);
-			return x;
-		}
-		throw new RuntimeException(
-			String
-				.format(
-					"MERGE_FROM_AND_GET incompatible types: %s, %s",
-					x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
+		return (G) MergeUtils.merge(x, y);
 	}

 	@SuppressWarnings("unchecked")
--- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java
+++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java
@ -8,6 +8,7 @@ import static org.mockito.Mockito.*;

 import java.util.function.BiFunction;

+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;

@ -85,6 +86,7 @@ public class MergeAndGetTest {
 		}

 		@Test
+		@Disabled
 		void shouldBehaveProperlyForRelationAndRelation() {
 			// given
 			Relation a = mock(Relation.class);
@ -96,7 +98,9 @@ public class MergeAndGetTest {
 			// then
 			Oaf x = fn.get().apply(a, b);
 			assertTrue(Relation.class.isAssignableFrom(x.getClass()));
-			verify(a).mergeFrom(b);
+
+			// TODO should be reimplemented
+			// verify(a).mergeFrom(b);
 			assertEquals(a, x);
 		}

@ -145,6 +149,7 @@ public class MergeAndGetTest {
 		}

 		@Test
+		@Disabled
 		void shouldBehaveProperlyForOafEntityAndOafEntity() {
 			// given
 			OafEntity a = mock(OafEntity.class);
@ -156,7 +161,9 @@ public class MergeAndGetTest {
 			// then
 			Oaf x = fn.get().apply(a, b);
 			assertTrue(OafEntity.class.isAssignableFrom(x.getClass()));
-			verify(a).mergeFrom(b);
+
+			// TODO should be reimplemented
+			// verify(a).mergeFrom(b);
 			assertEquals(a, x);
 		}
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -64,6 +64,12 @@ public class PrepareAffiliationRelations implements Serializable {
 		final String pubmedInputPath = parser.get("pubmedInputPath");
 		log.info("pubmedInputPath: {}", pubmedInputPath);

+		final String openapcInputPath = parser.get("openapcInputPath");
+		log.info("openapcInputPath: {}", openapcInputPath);
+
+		final String dataciteInputPath = parser.get("dataciteInputPath");
+		log.info("dataciteInputPath: {}", dataciteInputPath);
+
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

@ -85,8 +91,20 @@ public class PrepareAffiliationRelations implements Serializable {
 				JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
 					spark, pubmedInputPath, collectedFromPubmed);

+				List<KeyValue> collectedFromOpenAPC = OafMapperUtils
+					.listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC");
+				JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations(
+					spark, openapcInputPath, collectedFromOpenAPC);
+
+				List<KeyValue> collectedFromDatacite = OafMapperUtils
+						.listKeyValues(ModelConstants.DATACITE_ID, "Datacite");
+				JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
+						spark, dataciteInputPath, collectedFromDatacite);
+
 				crossrefRelations
 					.union(pubmedRelations)
+					.union(openAPCRelations)
+					.union(dataciteRelations)
 					.saveAsHadoopFile(
 						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/score/deserializers/BipProjectModel.java
@ -34,6 +34,10 @@ public class BipProjectModel {

 	String totalCitationCount;

+	public String getProjectId() {
+		return projectId;
+	}
+
 	// each project bip measure has exactly one value, hence one key-value pair
 	private Measure createMeasure(String measureId, String measureValue) {

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java
@ -75,6 +75,7 @@ public class GetFOSSparkJob implements Serializable {
 		fosData.map((MapFunction<Row, FOSDataModel>) r -> {
 			FOSDataModel fosDataModel = new FOSDataModel();
 			fosDataModel.setDoi(r.getString(0).toLowerCase());
+			fosDataModel.setOaid(r.getString(1).toLowerCase());
 			fosDataModel.setLevel1(r.getString(2));
 			fosDataModel.setLevel2(r.getString(3));
 			fosDataModel.setLevel3(r.getString(4));
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java
@ -16,12 +16,14 @@ import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.schema.oaf.Subject;
@ -52,62 +54,90 @@ public class PrepareFOSSparkJob implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

+		final Boolean distributeDOI = Optional
+			.ofNullable(parser.get("distributeDoi"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				distributeFOSdois(
-					spark,
-					sourcePath,
+				if (distributeDOI)
+					distributeFOSdois(
+						spark,
+						sourcePath,

-					outputPath);
+						outputPath);
+				else
+					distributeFOSoaid(spark, sourcePath, outputPath);
 			});
 	}

+	private static void distributeFOSoaid(SparkSession spark, String sourcePath, String outputPath) {
+		Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
+
+		fosDataset
+			.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getOaid().toLowerCase(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
+				return getResult(ModelSupport.getIdPrefix(Result.class) + "|" + k, it);
+			}, Encoders.bean(Result.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath + "/fos");
+	}
+
+	@NotNull
+	private static Result getResult(String k, Iterator<FOSDataModel> it) {
+		Result r = new Result();
+		FOSDataModel first = it.next();
+		r.setId(k);
+
+		HashSet<String> level1 = new HashSet<>();
+		HashSet<String> level2 = new HashSet<>();
+		HashSet<String> level3 = new HashSet<>();
+		HashSet<String> level4 = new HashSet<>();
+		addLevels(level1, level2, level3, level4, first);
+		it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v));
+		List<Subject> sbjs = new ArrayList<>();
+		level1
+			.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+		level2
+			.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
+		level3
+			.forEach(
+				l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
+		level4
+			.forEach(
+				l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
+		r.setSubject(sbjs);
+		r
+			.setDataInfo(
+				OafMapperUtils
+					.dataInfo(
+						false, null, true,
+						false,
+						OafMapperUtils
+							.qualifier(
+								ModelConstants.PROVENANCE_ENRICH,
+								null,
+								ModelConstants.DNET_PROVENANCE_ACTIONS,
+								ModelConstants.DNET_PROVENANCE_ACTIONS),
+						null));
+		return r;
+	}
+
 	private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
 		Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);

 		fosDataset
 			.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi().toLowerCase(), Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
-				Result r = new Result();
-				FOSDataModel first = it.next();
-				r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
-
-				HashSet<String> level1 = new HashSet<>();
-				HashSet<String> level2 = new HashSet<>();
-				HashSet<String> level3 = new HashSet<>();
-				HashSet<String> level4 = new HashSet<>();
-				addLevels(level1, level2, level3, level4, first);
-				it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v));
-				List<Subject> sbjs = new ArrayList<>();
-				level1
-					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-				level2
-					.forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
-				level3
-					.forEach(
-						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
-				level4
-					.forEach(
-						l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true)));
-				r.setSubject(sbjs);
-				r
-					.setDataInfo(
-						OafMapperUtils
-							.dataInfo(
-								false, null, true,
-								false,
-								OafMapperUtils
-									.qualifier(
-										ModelConstants.PROVENANCE_ENRICH,
-										null,
-										ModelConstants.DNET_PROVENANCE_ACTIONS,
-										ModelConstants.DNET_PROVENANCE_ACTIONS),
-								null));
-				return r;
-			}, Encoders.bean(Result.class))
+			.mapGroups(
+				(MapGroupsFunction<String, FOSDataModel, Result>) (k,
+					it) -> getResult(DHPUtils.generateUnresolvedIdentifier(k, DOI), it),
+				Encoders.bean(Result.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java
@ -0,0 +1,92 @@
+
+package eu.dnetlib.dhp.actionmanager.fosnodoi;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.*;
+
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.utils.*;
+import scala.Tuple2;
+
+public class CreateActionSetSparkJob implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static void main(final String[] args) throws IOException, ParseException {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							CreateActionSetSparkJob.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/actionmanager/fosnodoi/as_parameters.json"))));
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath {}", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}", outputPath);
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> createActionSet(spark, inputPath, outputPath));
+
+	}
+
+	private static void createActionSet(SparkSession spark, String inputPath, String outputPath) {
+		spark
+			.read()
+			.textFile(inputPath)
+			.map(
+				(MapFunction<String, Result>) value -> OBJECT_MAPPER.readValue(value, Result.class),
+				Encoders.bean(Result.class))
+			.toJavaRDD()
+			.map(p -> new AtomicAction(p.getClass(), p))
+			.mapToPair(
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
+			.saveAsHadoopFile(
+				outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java
@ -22,12 +22,14 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.action.AtomicAction;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.utils.*;
 import eu.dnetlib.dhp.utils.DHPUtils;
@ -37,16 +39,12 @@ public class CreateActionSetSparkJob implements Serializable {
 	public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations";
 	public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";

-	// DOI-to-DOI citations
-	public static final String COCI = "COCI";
-
-	// PMID-to-PMID citations
-	public static final String POCI = "POCI";
-
 	private static final String DOI_PREFIX = "50|doi_________::";

 	private static final String PMID_PREFIX = "50|pmid________::";
+	private static final String ARXIV_PREFIX = "50|arXiv_______::";

+	private static final String PMCID_PREFIX = "50|pmcid_______::";
 	private static final String TRUST = "0.91";

 	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
@ -79,38 +77,30 @@ public class CreateActionSetSparkJob implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}", outputPath);

-		final boolean shouldDuplicateRels = Optional
-			.ofNullable(parser.get("shouldDuplicateRels"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.FALSE);
-
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
-			spark -> extractContent(spark, inputPath, outputPath, shouldDuplicateRels));
+			spark -> extractContent(spark, inputPath, outputPath));

 	}

-	private static void extractContent(SparkSession spark, String inputPath, String outputPath,
-		boolean shouldDuplicateRels) {
+	private static void extractContent(SparkSession spark, String inputPath, String outputPath) {

-		getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, COCI)
-			.union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, POCI))
+		getTextTextJavaPairRDD(spark, inputPath)
 			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
 	}

-	private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath,
-		boolean shouldDuplicateRels, String prefix) {
+	private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath) {
 		return spark
 			.read()
-			.textFile(inputPath + "/" + prefix + "/" + prefix + "_JSON/*")
+			.textFile(inputPath)
 			.map(
 				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
 				Encoders.bean(COCI.class))
 			.flatMap(
 				(FlatMapFunction<COCI, Relation>) value -> createRelation(
-					value, shouldDuplicateRels, prefix)
+					value)
 						.iterator(),
 				Encoders.bean(Relation.class))
 			.filter((FilterFunction<Relation>) Objects::nonNull)
@ -121,34 +111,68 @@ public class CreateActionSetSparkJob implements Serializable {
 					new Text(OBJECT_MAPPER.writeValueAsString(aa))));
 	}

-	private static List<Relation> createRelation(COCI value, boolean duplicate, String p) {
+	private static List<Relation> createRelation(COCI value) throws JsonProcessingException {

 		List<Relation> relationList = new ArrayList<>();
-		String prefix;
+
 		String citing;
 		String cited;

-		switch (p) {
-			case COCI:
-				prefix = DOI_PREFIX;
-				citing = prefix
+		switch (value.getCiting_pid()) {
+			case "doi":
+				citing = DOI_PREFIX
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCiting()));
-				cited = prefix
+				break;
+			case "pmid":
+				citing = PMID_PREFIX
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting()));
+				break;
+			case "arxiv":
+				citing = ARXIV_PREFIX
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.arXiv.toString(), value.getCiting()));
+				break;
+			case "pmcid":
+				citing = PMCID_PREFIX
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), value.getCiting()));
+				break;
+			case "isbn":
+			case "issn":
+				return relationList;
+
+			default:
+				throw new IllegalStateException("Invalid prefix: " + new ObjectMapper().writeValueAsString(value));
+		}
+
+		switch (value.getCited_pid()) {
+			case "doi":
+				cited = DOI_PREFIX
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCited()));
 				break;
-			case POCI:
-				prefix = PMID_PREFIX;
-				citing = prefix
-					+ IdentifierFactory
-						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting()));
-				cited = prefix
+			case "pmid":
+				cited = PMID_PREFIX
 					+ IdentifierFactory
 						.md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCited()));
 				break;
+			case "arxiv":
+				cited = ARXIV_PREFIX
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.arXiv.toString(), value.getCited()));
+				break;
+			case "pmcid":
+				cited = PMCID_PREFIX
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), value.getCited()));
+				break;
+			case "isbn":
+			case "issn":
+				return relationList;
 			default:
-				throw new IllegalStateException("Invalid prefix: " + p);
+				throw new IllegalStateException("Invalid prefix: " + new ObjectMapper().writeValueAsString(value));
 		}

 		if (!citing.equals(cited)) {
@ -157,15 +181,6 @@ public class CreateActionSetSparkJob implements Serializable {
 					getRelation(
 						citing,
 						cited, ModelConstants.CITES));
-
-			if (duplicate && value.getCiting().endsWith(".refs")) {
-				citing = prefix + IdentifierFactory
-					.md5(
-						CleaningFunctions
-							.normalizePidValue(
-								"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
-				relationList.add(getRelation(citing, cited, ModelConstants.CITES));
-			}
 		}

 		return relationList;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java
@ -12,10 +12,7 @@ import java.util.zip.ZipInputStream;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -37,17 +34,17 @@ public class GetOpenCitationsRefs implements Serializable {

 		parser.parseArgument(args);

-		final String[] inputFile = parser.get("inputFile").split(";");
-		log.info("inputFile {}", Arrays.asList(inputFile));
+//		final String[] inputFile = parser.get("inputFile").split(";");
+//		log.info("inputFile {}", Arrays.asList(inputFile));

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath {}", workingPath);
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}", inputPath);

 		final String hdfsNameNode = parser.get("hdfsNameNode");
 		log.info("hdfsNameNode {}", hdfsNameNode);

-		final String prefix = parser.get("prefix");
-		log.info("prefix {}", prefix);
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}", outputPath);

 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);
@ -56,41 +53,42 @@ public class GetOpenCitationsRefs implements Serializable {

 		GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();

-		for (String file : inputFile) {
-			ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem, prefix);
-		}
+		ocr.doExtract(inputPath, outputPath, fileSystem);

 	}

-	private void doExtract(String inputFile, String workingPath, FileSystem fileSystem, String prefix)
+	private void doExtract(String inputPath, String outputPath, FileSystem fileSystem)
 		throws IOException {

-		final Path path = new Path(inputFile);
+		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
+			.listFiles(
+				new Path(inputPath), true);
+		while (fileStatusListIterator.hasNext()) {
+			LocatedFileStatus fileStatus = fileStatusListIterator.next();
+			// do stuff with the file like ...
+			FSDataInputStream oc_zip = fileSystem.open(fileStatus.getPath());
+			try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
+				ZipEntry entry = null;
+				while ((entry = zis.getNextEntry()) != null) {

-		FSDataInputStream oc_zip = fileSystem.open(path);
+					if (!entry.isDirectory()) {
+						String fileName = entry.getName();
+						// fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
+						fileName = fileName.substring(0, fileName.lastIndexOf("."));
+						// count++;
+						try (
+							FSDataOutputStream out = fileSystem
+								.create(new Path(outputPath + "/" + fileName + ".gz"));
+							GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {

-		// int count = 1;
-		try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
-			ZipEntry entry = null;
-			while ((entry = zis.getNextEntry()) != null) {
-
-				if (!entry.isDirectory()) {
-					String fileName = entry.getName();
-					// fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
-					fileName = fileName.substring(0, fileName.lastIndexOf("."));
-					// count++;
-					try (
-						FSDataOutputStream out = fileSystem
-							.create(new Path(workingPath + "/" + prefix + "/" + fileName + ".gz"));
-						GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
-
-						IOUtils.copy(zis, gzipOs);
+							IOUtils.copy(zis, gzipOs);

+						}
 					}
+
 				}

 			}
-
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/MapOCIdsInPids.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/MapOCIdsInPids.java
@ -0,0 +1,171 @@
+
+package eu.dnetlib.dhp.actionmanager.opencitations;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import scala.Tuple2;
+
+/**
+ * @author miriam.baglioni
+ * @Date 29/02/24
+ */
+public class MapOCIdsInPids implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
+	private static final String DELIMITER = ",";
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static void main(final String[] args) throws IOException, ParseException {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							MapOCIdsInPids.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json"))));
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}", outputPath);
+
+		final String nameNode = parser.get("nameNode");
+		log.info("nameNode {}", nameNode);
+
+		unzipCorrespondenceFile(inputPath, nameNode);
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> mapIdentifiers(spark, inputPath, outputPath));
+
+	}
+
+	private static void unzipCorrespondenceFile(String inputPath, String hdfsNameNode) throws IOException {
+		Configuration conf = new Configuration();
+		conf.set("fs.defaultFS", hdfsNameNode);
+
+		final Path path = new Path(inputPath + "/correspondence/omid.zip");
+		FileSystem fileSystem = FileSystem.get(conf);
+
+		FSDataInputStream project_zip = fileSystem.open(path);
+
+		try (ZipInputStream zis = new ZipInputStream(project_zip)) {
+			ZipEntry entry = null;
+			while ((entry = zis.getNextEntry()) != null) {
+
+				if (!entry.isDirectory()) {
+					String fileName = entry.getName();
+					byte buffer[] = new byte[1024];
+					int count;
+
+					try (
+						FSDataOutputStream out = fileSystem
+							.create(new Path(inputPath + "/correspondence/omid.csv"))) {
+
+						while ((count = zis.read(buffer, 0, buffer.length)) != -1)
+							out.write(buffer, 0, count);
+
+					}
+
+				}
+
+			}
+
+		}
+
+	}
+
+	private static void mapIdentifiers(SparkSession spark, String inputPath, String outputPath) {
+		Dataset<COCI> coci = spark
+			.read()
+			.textFile(inputPath + "/JSON")
+			.map(
+				(MapFunction<String, COCI>) value -> OBJECT_MAPPER.readValue(value, COCI.class),
+				Encoders.bean(COCI.class));
+
+		Dataset<Tuple2<String, String>> correspondenceData = spark
+			.read()
+			.format("csv")
+			.option("sep", DELIMITER)
+			.option("inferSchema", "true")
+			.option("header", "true")
+			.option("quotes", "\"")
+			.load(inputPath + "/correspondence/omid.csv")
+			.repartition(5000)
+			.flatMap((FlatMapFunction<Row, Tuple2<String, String>>) r -> {
+				String ocIdentifier = r.getAs("omid");
+				String[] correspondentIdentifiers = ((String) r.getAs("id")).split(" ");
+				return Arrays
+					.stream(correspondentIdentifiers)
+					.map(ci -> new Tuple2<String, String>(ocIdentifier, ci))
+					.collect(Collectors.toList())
+					.iterator();
+			}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
+
+		Dataset<COCI> mappedCitingDataset = coci
+			.joinWith(correspondenceData, coci.col("citing").equalTo(correspondenceData.col("_1")))
+			.map((MapFunction<Tuple2<COCI, Tuple2<String, String>>, COCI>) t2 -> {
+				String correspondent = t2._2()._2();
+				t2._1().setCiting_pid(correspondent.substring(0, correspondent.indexOf(":")));
+				t2._1().setCiting(correspondent.substring(correspondent.indexOf(":") + 1));
+				return t2._1();
+			}, Encoders.bean(COCI.class));
+
+		mappedCitingDataset
+			.joinWith(correspondenceData, mappedCitingDataset.col("cited").equalTo(correspondenceData.col("_1")))
+			.map((MapFunction<Tuple2<COCI, Tuple2<String, String>>, COCI>) t2 -> {
+				String correspondent = t2._2()._2();
+				t2._1().setCited_pid(correspondent.substring(0, correspondent.indexOf(":")));
+				t2._1().setCited(correspondent.substring(correspondent.indexOf(":") + 1));
+				return t2._1();
+			}, Encoders.bean(COCI.class))
+			.write()
+			.mode(SaveMode.Append)
+			.option("compression", "gzip")
+			.json(outputPath);
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java
@ -12,11 +12,9 @@ import java.util.Optional;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.*;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
@ -42,19 +40,21 @@ public class ReadCOCI implements Serializable {
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

-		final String[] inputFile = parser.get("inputFile").split(";");
-		log.info("inputFile {}", Arrays.asList(inputFile));
+		final String hdfsNameNode = parser.get("hdfsNameNode");
+		log.info("hdfsNameNode {}", hdfsNameNode);
+
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
+		final String workingPath = parser.get("inputPath");
 		log.info("workingPath {}", workingPath);

-		final String format = parser.get("format");
-		log.info("format {}", format);
-
 		SparkConf sconf = new SparkConf();

+		Configuration conf = new Configuration();
+		conf.set("fs.defaultFS", hdfsNameNode);
+
+		FileSystem fileSystem = FileSystem.get(conf);
 		final String delimiter = Optional
 			.ofNullable(parser.get("delimiter"))
 			.orElse(DEFAULT_DELIMITER);
@ -66,20 +66,21 @@ public class ReadCOCI implements Serializable {
 				doRead(
 					spark,
 					workingPath,
-					inputFile,
+					fileSystem,
 					outputPath,
-					delimiter,
-					format);
+					delimiter);
 			});
 	}

-	private static void doRead(SparkSession spark, String workingPath, String[] inputFiles,
+	private static void doRead(SparkSession spark, String workingPath, FileSystem fileSystem,
 		String outputPath,
-		String delimiter, String format) {
-
-		for (String inputFile : inputFiles) {
-			String pString = workingPath + "/" + inputFile + ".gz";
-
+		String delimiter) throws IOException {
+		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
+			.listFiles(
+				new Path(workingPath), true);
+		while (fileStatusListIterator.hasNext()) {
+			LocatedFileStatus fileStatus = fileStatusListIterator.next();
+			log.info("extracting file {}", fileStatus.getPath().toString());
 			Dataset<Row> cociData = spark
 				.read()
 				.format("csv")
@ -87,26 +88,26 @@ public class ReadCOCI implements Serializable {
 				.option("inferSchema", "true")
 				.option("header", "true")
 				.option("quotes", "\"")
-				.load(pString)
+				.load(fileStatus.getPath().toString())
 				.repartition(100);

 			cociData.map((MapFunction<Row, COCI>) row -> {
+
 				COCI coci = new COCI();
-				if (format.equals("COCI")) {
-					coci.setCiting(row.getString(1));
-					coci.setCited(row.getString(2));
-				} else {
-					coci.setCiting(String.valueOf(row.getInt(1)));
-					coci.setCited(String.valueOf(row.getInt(2)));
-				}
+
+				coci.setCiting(row.getString(1));
+				coci.setCited(row.getString(2));
+
 				coci.setOci(row.getString(0));

 				return coci;
 			}, Encoders.bean(COCI.class))
+				.filter((FilterFunction<COCI>) c -> c != null)
 				.write()
-				.mode(SaveMode.Overwrite)
+				.mode(SaveMode.Append)
 				.option("compression", "gzip")
-				.json(outputPath + inputFile);
+				.json(outputPath);
+			fileSystem.rename(fileStatus.getPath(), new Path("/tmp/miriam/OC/DONE"));
 		}

 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java
@ -9,8 +9,10 @@ public class COCI implements Serializable {
 	private String oci;

 	private String citing;
+	private String citing_pid;

 	private String cited;
+	private String cited_pid;

 	public String getOci() {
 		return oci;
@ -25,6 +27,8 @@ public class COCI implements Serializable {
 	}

 	public void setCiting(String citing) {
+		if (citing != null && citing.startsWith("omid:"))
+			citing = citing.substring(5);
 		this.citing = citing;
 	}

@ -33,7 +37,24 @@ public class COCI implements Serializable {
 	}

 	public void setCited(String cited) {
+		if (cited != null && cited.startsWith("omid:"))
+			cited = cited.substring(5);
 		this.cited = cited;
 	}

+	public String getCiting_pid() {
+		return citing_pid;
+	}
+
+	public void setCiting_pid(String citing_pid) {
+		this.citing_pid = citing_pid;
+	}
+
+	public String getCited_pid() {
+		return cited_pid;
+	}
+
+	public void setCited_pid(String cited_pid) {
+		this.cited_pid = cited_pid;
+	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
 import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
-import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
 import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -33,6 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.H2020Classification;
 import eu.dnetlib.dhp.schema.oaf.H2020Programme;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.Project;
+import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;

@ -160,9 +160,11 @@ public class SparkAtomicActionJob {
 				(MapFunction<Project, String>) OafEntity::getId,
 				Encoders.STRING())
 			.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
-				Project first = it.next();
-				it.forEachRemaining(first::mergeFrom);
-				return first;
+				Project merge = it.next();
+				while (it.hasNext()) {
+					merge = MergeUtils.mergeProject(merge, it.next());
+				}
+				return merge;
 			}, Encoders.bean(Project.class))
 			.toJavaRDD()
 			.map(p -> new AtomicAction(Project.class, p))
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateActionSetSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateActionSetSparkJob.java
@ -0,0 +1,195 @@
+
+package eu.dnetlib.dhp.actionmanager.transformativeagreement;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.*;
+
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.transformativeagreement.model.TransformativeAgreementModel;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Country;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.utils.*;
+import scala.Tuple2;
+
+public class CreateActionSetSparkJob implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static final String IREL_PROJECT = "40|100018998___::1e5e62235d094afd01cd56e65112fc63";
+	private static final String TRANSFORMATIVE_AGREEMENT = "openapc::transformativeagreement";
+
+	public static void main(final String[] args) throws IOException, ParseException {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							CreateActionSetSparkJob.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/actionmanager/transformativeagreement/as_parameters.json"))));
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}", outputPath);
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> createActionSet(spark, inputPath, outputPath));
+
+	}
+
+	private static void createActionSet(SparkSession spark, String inputPath, String outputPath) {
+		JavaRDD<AtomicAction> relations = spark
+			.read()
+			.textFile(inputPath)
+			.map(
+				(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
+					.readValue(value, TransformativeAgreementModel.class),
+				Encoders.bean(TransformativeAgreementModel.class))
+			.flatMap(
+				(FlatMapFunction<TransformativeAgreementModel, Relation>) value -> createRelation(
+					value)
+						.iterator(),
+				Encoders.bean(Relation.class))
+			.filter((FilterFunction<Relation>) Objects::nonNull)
+			.toJavaRDD()
+			.map(p -> new AtomicAction(p.getClass(), p));
+//TODO relations in stand-by waiting to know if we need to create them or not In case we need just make a union before saving the sequence file
+		spark
+			.read()
+			.textFile(inputPath)
+			.map(
+				(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
+					.readValue(value, TransformativeAgreementModel.class),
+				Encoders.bean(TransformativeAgreementModel.class))
+			.map(
+				(MapFunction<TransformativeAgreementModel, Result>) value -> createResult(
+					value),
+				Encoders.bean(Result.class))
+			.filter((FilterFunction<Result>) r -> r != null)
+			.toJavaRDD()
+			.map(p -> new AtomicAction(p.getClass(), p))
+			.mapToPair(
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
+			.saveAsHadoopFile(
+				outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+
+	}
+
+	private static Result createResult(TransformativeAgreementModel value) {
+		Result r = new Result();
+		r
+			.setId(
+				"50|doi_________::"
+					+ IdentifierFactory
+						.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi())));
+		r.setTransformativeAgreement(value.getAgreement());
+		Country country = new Country();
+		country.setClassid(value.getCountry());
+		country.setClassname(value.getCountry());
+		country
+			.setDataInfo(
+				OafMapperUtils
+					.dataInfo(
+						false, ModelConstants.SYSIMPORT_ACTIONSET, false, false,
+						OafMapperUtils
+							.qualifier(
+								"openapc::transformativeagreement",
+								"Harvested from Trnasformative Agreement file from OpenAPC",
+								ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
+						"0.9"));
+		country.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
+		country.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
+		r.setCountry(Arrays.asList(country));
+		return r;
+	}
+
+	private static List<Relation> createRelation(TransformativeAgreementModel value) {
+
+		List<Relation> relationList = new ArrayList<>();
+
+		if (value.getAgreement().startsWith("IReL")) {
+			String paper;
+
+			paper = "50|doi_________::"
+				+ IdentifierFactory
+					.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi()));
+
+			relationList
+				.add(
+					getRelation(
+						paper,
+						IREL_PROJECT, ModelConstants.IS_PRODUCED_BY));
+
+			relationList.add(getRelation(IREL_PROJECT, paper, ModelConstants.PRODUCES));
+		}
+		return relationList;
+	}
+
+	public static Relation getRelation(
+		String source,
+		String target,
+		String relClass) {
+
+		return OafMapperUtils
+			.getRelation(
+				source,
+				target,
+				ModelConstants.RESULT_PROJECT,
+				ModelConstants.OUTCOME,
+				relClass,
+				Arrays
+					.asList(
+						OafMapperUtils.keyValue(ModelConstants.OPEN_APC_ID, ModelConstants.OPEN_APC_NAME)),
+				OafMapperUtils
+					.dataInfo(
+						false, null, false, false,
+						OafMapperUtils
+							.qualifier(
+								TRANSFORMATIVE_AGREEMENT, "Transformative Agreement",
+								ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
+						"0.9"),
+				null);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/model/TransformativeAgreementModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/model/TransformativeAgreementModel.java
@ -0,0 +1,51 @@
+
+package eu.dnetlib.dhp.actionmanager.transformativeagreement.model;
+
+import java.io.Serializable;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+/**
+ * @author miriam.baglioni
+ * @Date 18/12/23
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+
+public class TransformativeAgreementModel implements Serializable {
+	private String institution;
+	private String doi;
+	private String agreement;
+	private String country;
+
+	public String getCountry() {
+		return country;
+	}
+
+	public void setCountry(String country) {
+		this.country = country;
+	}
+
+	public String getInstitution() {
+		return institution;
+	}
+
+	public void setInstitution(String institution) {
+		this.institution = institution;
+	}
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public String getAgreement() {
+		return agreement;
+	}
+
+	public void setAgreement(String agreement) {
+		this.agreement = agreement;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.actionmanager.Constants.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;

 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Optional;
@ -13,7 +14,9 @@ import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -68,18 +71,59 @@ public class SparkAtomicActionUsageJob implements Serializable {

 		final String workingPath = parser.get("workingPath");

+		final String datasourcePath = parser.get("datasourcePath");
+
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				removeOutputDir(spark, outputPath);
-				prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats", "result_id");
+				prepareResultData(
+					dbname, spark, workingPath + "/usageDb",
+					"usage_stats",
+					"result_id",
+					"repository_id",
+					datasourcePath);
 				prepareData(dbname, spark, workingPath + "/projectDb", "project_stats", "id");
 				prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats", "repository_id");
 				writeActionSet(spark, workingPath, outputPath);
 			});
 	}

+	private static void prepareResultData(String dbname, SparkSession spark, String workingPath, String tableName,
+		String resultAttributeName, String datasourceAttributeName,
+		String datasourcePath) {
+		Dataset<UsageStatsResultModel> resultModel = spark
+			.sql(
+				String
+					.format(
+						"select %s as id, %s as datasourceId, sum(downloads) as downloads, sum(views) as views " +
+							"from %s.%s group by %s, %s",
+						resultAttributeName, datasourceAttributeName, dbname, tableName, resultAttributeName,
+						datasourceAttributeName))
+			.as(Encoders.bean(UsageStatsResultModel.class));
+		Dataset<Datasource> datasource = readPath(spark, datasourcePath, Datasource.class)
+			.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
+			.map((MapFunction<Datasource, Datasource>) d -> {
+				d.setId(d.getId().substring(3));
+				return d;
+			}, Encoders.bean(Datasource.class));
+		resultModel
+			.joinWith(datasource, resultModel.col("datasourceId").equalTo(datasource.col("id")), "left")
+			.map((MapFunction<Tuple2<UsageStatsResultModel, Datasource>, UsageStatsResultModel>) t2 -> {
+				UsageStatsResultModel usrm = t2._1();
+				if(Optional.ofNullable(t2._2()).isPresent())
+					usrm.setDatasourceId(usrm.getDatasourceId() + "||" + t2._2().getOfficialname().getValue());
+				else
+					usrm.setDatasourceId(usrm.getDatasourceId() + "||NO_MATCH_FOUND");
+				return usrm;
+			}, Encoders.bean(UsageStatsResultModel.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(workingPath);
+	}
+
 	private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName,
 		String attribute_name) {
 		spark
@ -115,15 +159,62 @@ public class SparkAtomicActionUsageJob implements Serializable {

 	}

+	public static Measure newMeasureInstance(String id) {
+		Measure m = new Measure();
+		m.setId(id);
+		m.setUnit(new ArrayList<>());
+		return m;
+	}
+
 	private static Dataset<Result> getFinalIndicatorsResult(SparkSession spark, String inputPath) {

-		return readPath(spark, inputPath, UsageStatsModel.class)
-			.map((MapFunction<UsageStatsModel, Result>) usm -> {
+		return readPath(spark, inputPath, UsageStatsResultModel.class)
+			.groupByKey((MapFunction<UsageStatsResultModel, String>) usm -> usm.getId(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, UsageStatsResultModel, Result>) (k, it) -> {
 				Result r = new Result();
-				r.setId("50|" + usm.getId());
-				r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
+				r.setId("50|" + k);
+				// id = download or view and unit = list of key value pairs
+				Measure download = newMeasureInstance("downloads");
+				Measure view = newMeasureInstance("views");
+				UsageStatsResultModel first = it.next();
+				addCountForDatasource(download, first, view);
+				it.forEachRemaining(usm -> {
+					addCountForDatasource(download, usm, view);
+				});
+				r.setMeasures(Arrays.asList(download, view));
 				return r;
-			}, Encoders.bean(Result.class));
+			}, Encoders.bean(Result.class))
+//			.map((MapFunction<UsageStatsResultModel, Result>) usm -> {
+//				Result r = new Result();
+//				r.setId("50|" + usm.getId());
+//				r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
+//				return r;
+//			}, Encoders.bean(Result.class));
+		;
+	}
+
+	private static void addCountForDatasource(Measure download, UsageStatsResultModel usm, Measure view) {
+		DataInfo dataInfo = OafMapperUtils
+			.dataInfo(
+				false,
+				UPDATE_DATA_INFO_TYPE,
+				true,
+				false,
+				OafMapperUtils
+					.qualifier(
+						UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
+						UPDATE_CLASS_NAME,
+						ModelConstants.DNET_PROVENANCE_ACTIONS,
+						ModelConstants.DNET_PROVENANCE_ACTIONS),
+				"");
+		download
+			.getUnit()
+			.add(
+				OafMapperUtils
+					.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getDownloads()), dataInfo));
+		view
+			.getUnit()
+			.add(OafMapperUtils.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getViews()), dataInfo));
 	}

 	private static Dataset<Project> getFinalIndicatorsProject(SparkSession spark, String inputPath) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsResultModel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsResultModel.java
@ -0,0 +1,18 @@
+
+package eu.dnetlib.dhp.actionmanager.usagestats;
+
+/**
+ * @author miriam.baglioni
+ * @Date 30/06/23
+ */
+public class UsageStatsResultModel extends UsageStatsModel {
+	private String datasourceId;
+
+	public String getDatasourceId() {
+		return datasourceId;
+	}
+
+	public void setDatasourceId(String datasourceId) {
+		this.datasourceId = datasourceId;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
 import eu.dnetlib.dhp.aggregation.common.ReportingJob;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.base.BaseCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
@ -120,6 +121,8 @@ public class CollectorWorker extends ReportingJob {
 				return new FileCollectorPlugin(fileSystem);
 			case fileGzip:
 				return new FileGZipCollectorPlugin(fileSystem);
+			case baseDump:
+				return new BaseCollectorPlugin(this.fileSystem);
 			case other:
 				final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
 					.ofNullable(api.getParams().get("other_plugin_type"))
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@ -10,7 +10,8 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
 public interface CollectorPlugin {

 	enum NAME {
-		oai, other, rest_json2xml, file, fileGzip;
+
+		oai, other, rest_json2xml, file, fileGzip, baseDump;

 		public enum OTHER_NAME {
 			mdstore_mongodb_dump, mdstore_mongodb
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java
@ -0,0 +1,171 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.Iterator;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLEventWriter;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLOutputFactory;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+
+public class BaseCollectorIterator implements Iterator<String> {
+
+	private String nextElement;
+
+	private final BlockingQueue<String> queue = new LinkedBlockingQueue<>(100);
+
+	private static final Logger log = LoggerFactory.getLogger(BaseCollectorIterator.class);
+
+	private static final String END_ELEM = "__END__";
+
+	public BaseCollectorIterator(final FileSystem fs, final Path filePath, final AggregatorReport report) {
+		new Thread(() -> importHadoopFile(fs, filePath, report)).start();
+		try {
+			this.nextElement = this.queue.take();
+		} catch (final InterruptedException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	protected BaseCollectorIterator(final String resourcePath, final AggregatorReport report) {
+		new Thread(() -> importTestFile(resourcePath, report)).start();
+		try {
+			this.nextElement = this.queue.take();
+		} catch (final InterruptedException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public synchronized boolean hasNext() {
+		return (this.nextElement != null) & !END_ELEM.equals(this.nextElement);
+	}
+
+	@Override
+	public synchronized String next() {
+		try {
+			return END_ELEM.equals(this.nextElement) ? null : this.nextElement;
+		} finally {
+			try {
+				this.nextElement = this.queue.take();
+			} catch (final InterruptedException e) {
+				throw new RuntimeException(e);
+			}
+		}
+
+	}
+
+	private void importHadoopFile(final FileSystem fs, final Path filePath, final AggregatorReport report) {
+		log.info("I start to read the TAR stream");
+
+		try (InputStream origInputStream = fs.open(filePath);
+			final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
+			importTarStream(tarInputStream, report);
+		} catch (final Throwable e) {
+			throw new RuntimeException("Error processing BASE records", e);
+		}
+	}
+
+	private void importTestFile(final String resourcePath, final AggregatorReport report) {
+		try (final InputStream origInputStream = BaseCollectorIterator.class.getResourceAsStream(resourcePath);
+			final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
+			importTarStream(tarInputStream, report);
+		} catch (final Throwable e) {
+			throw new RuntimeException("Error processing BASE records", e);
+		}
+	}
+
+	private void importTarStream(final TarArchiveInputStream tarInputStream, final AggregatorReport report) {
+		long count = 0;
+
+		final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
+		final XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
+
+		try {
+			TarArchiveEntry entry;
+			while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
+				final String name = entry.getName();
+
+				if (!entry.isDirectory() && name.contains("ListRecords") && name.endsWith(".bz2")) {
+
+					log.info("Processing file (BZIP): " + name);
+
+					final byte[] bzipData = new byte[(int) entry.getSize()];
+					IOUtils.readFully(tarInputStream, bzipData);
+
+					try (InputStream bzipIs = new ByteArrayInputStream(bzipData);
+						final BufferedInputStream bzipBis = new BufferedInputStream(bzipIs);
+						final CompressorInputStream bzipInput = new CompressorStreamFactory()
+							.createCompressorInputStream(bzipBis)) {
+
+						final XMLEventReader reader = xmlInputFactory.createXMLEventReader(bzipInput);
+
+						XMLEventWriter eventWriter = null;
+						StringWriter xmlWriter = null;
+
+						while (reader.hasNext()) {
+							final XMLEvent nextEvent = reader.nextEvent();
+
+							if (nextEvent.isStartElement()) {
+								final StartElement startElement = nextEvent.asStartElement();
+								if ("record".equals(startElement.getName().getLocalPart())) {
+									xmlWriter = new StringWriter();
+									eventWriter = xmlOutputFactory.createXMLEventWriter(xmlWriter);
+								}
+							}
+
+							if (eventWriter != null) {
+								eventWriter.add(nextEvent);
+							}
+
+							if (nextEvent.isEndElement()) {
+								final EndElement endElement = nextEvent.asEndElement();
+								if ("record".equals(endElement.getName().getLocalPart())) {
+									eventWriter.flush();
+									eventWriter.close();
+
+									this.queue.put(xmlWriter.toString());
+
+									eventWriter = null;
+									xmlWriter = null;
+									count++;
+								}
+							}
+
+						}
+					}
+				}
+			}
+
+			this.queue.put(END_ELEM); // TO INDICATE THE END OF THE QUEUE
+		} catch (final Throwable e) {
+			log.error("Error processing BASE records", e);
+			report.put(e.getClass().getName(), e.getMessage());
+			throw new RuntimeException("Error processing BASE records", e);
+		} finally {
+			log.info("Total records (written in queue): " + count);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java
@ -0,0 +1,159 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.file.AbstractSplittedRecordPlugin;
+import eu.dnetlib.dhp.common.DbClient;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class BaseCollectorPlugin implements CollectorPlugin {
+
+	private final FileSystem fs;
+
+	private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
+
+	// MAPPING AND FILTERING ARE DEFINED HERE:
+	// https://docs.google.com/document/d/1Aj-ZAV11b44MCrAAUCPiS2TUlXb6PnJEu1utCMAcCOU/edit
+
+	public BaseCollectorPlugin(final FileSystem fs) {
+		this.fs = fs;
+	}
+
+	@Override
+	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
+		// the path of the dump file on HDFS
+		// http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar
+		// it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS
+		final Path filePath = Optional
+			.ofNullable(api.getBaseUrl())
+			.map(Path::new)
+			.orElseThrow(() -> new CollectorException("missing baseUrl"));
+
+		// get the parameters for the connection to the OpenAIRE database.
+		// the database is used to obtain the list of the datasources that the plugin will collect
+		final String dbUrl = api.getParams().get("dbUrl");
+		final String dbUser = api.getParams().get("dbUser");
+		final String dbPassword = api.getParams().get("dbPassword");
+
+		// the types(comma separated, empty value for all) that the plugin will collect,
+		// the types should be expressed in the format of the normalized types of BASE (for example 1,121,...)
+		final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");
+
+		log.info("baseUrl: {}", filePath);
+		log.info("dbUrl: {}", dbUrl);
+		log.info("dbUser: {}", dbUser);
+		log.info("dbPassword: {}", "***");
+		log.info("acceptedNormTypes: {}", acceptedNormTypesString);
+
+		try {
+			if (!this.fs.exists(filePath)) {
+				throw new CollectorException("path does not exist: " + filePath);
+			}
+		} catch (final Throwable e) {
+			throw new CollectorException(e);
+		}
+
+		final Set<String> acceptedOpendoarIds = findAcceptedOpendoarIds(dbUrl, dbUser, dbPassword);
+
+		final Set<String> acceptedNormTypes = new HashSet<>();
+		if (StringUtils.isNotBlank(acceptedNormTypesString)) {
+			for (final String s : StringUtils.split(acceptedNormTypesString, ",")) {
+				if (StringUtils.isNotBlank(s)) {
+					acceptedNormTypes.add(s.trim());
+				}
+			}
+		}
+
+		final Iterator<String> iterator = new BaseCollectorIterator(this.fs, filePath, report);
+		final Spliterator<String> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
+		return StreamSupport
+			.stream(spliterator, false)
+			.filter(doc -> filterXml(doc, acceptedOpendoarIds, acceptedNormTypes));
+	}
+
+	private Set<String> findAcceptedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword)
+		throws CollectorException {
+		final Set<String> accepted = new HashSet<>();
+
+		try (final DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
+
+			final String sql = IOUtils
+				.toString(
+					getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql"));
+
+			dbClient.processResults(sql, row -> {
+				try {
+					final String dsId = row.getString("id");
+					log.info("Accepted Datasource: " + dsId);
+					accepted.add(dsId);
+				} catch (final SQLException e) {
+					log.error("Error in SQL", e);
+					throw new RuntimeException("Error in SQL", e);
+				}
+			});
+
+		} catch (final IOException e) {
+			log.error("Error accessong SQL", e);
+			throw new CollectorException("Error accessong SQL", e);
+		}
+
+		log.info("Accepted Datasources (TOTAL): " + accepted.size());
+
+		return accepted;
+	}
+
+	protected static boolean filterXml(final String xml,
+		final Set<String> acceptedOpendoarIds,
+		final Set<String> acceptedNormTypes) {
+		try {
+
+			final Document doc = DocumentHelper.parseText(xml);
+
+			final String id = doc.valueOf("//*[local-name()='collection']/@opendoar_id").trim();
+
+			if (StringUtils.isBlank(id) || !acceptedOpendoarIds.contains("opendoar____::" + id)) {
+				return false;
+			}
+
+			if (acceptedNormTypes.isEmpty()) {
+				return true;
+			}
+
+			for (final Object s : doc.selectNodes("//*[local-name()='typenorm']")) {
+				if (acceptedNormTypes.contains(((Node) s).getText().trim())) {
+					return true;
+				}
+			}
+
+			return false;
+		} catch (final DocumentException e) {
+			log.error("Error parsing document", e);
+			throw new RuntimeException("Error parsing document", e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
@ -48,23 +48,30 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
 	@Override
 	public MetadataRecord call(MetadataRecord value) {
 		aggregationCounter.getTotalItems().add(1);
+
+		Processor processor = new Processor(false);
+
+		processor.registerExtensionFunction(cleanFunction);
+		processor.registerExtensionFunction(new DateCleaner());
+		processor.registerExtensionFunction(new PersonCleaner());
+
+		final XsltCompiler comp = processor.newXsltCompiler();
+		QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
+		comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
+		QName datasourceNameParam = new QName(DATASOURCE_NAME_PARAM);
+		comp.setParameter(datasourceNameParam, new XdmAtomicValue(value.getProvenance().getDatasourceName()));
+		XsltExecutable xslt;
+		XdmNode source;
 		try {
-			Processor processor = new Processor(false);
-
-			processor.registerExtensionFunction(cleanFunction);
-			processor.registerExtensionFunction(new DateCleaner());
-			processor.registerExtensionFunction(new PersonCleaner());
-
-			final XsltCompiler comp = processor.newXsltCompiler();
-			QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
-			comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
-			QName datasourceNameParam = new QName(DATASOURCE_NAME_PARAM);
-			comp.setParameter(datasourceNameParam, new XdmAtomicValue(value.getProvenance().getDatasourceName()));
-			XsltExecutable xslt = comp
+			xslt = comp
 				.compile(new StreamSource(IOUtils.toInputStream(transformationRule, StandardCharsets.UTF_8)));
-			XdmNode source = processor
+			source = processor
 				.newDocumentBuilder()
 				.build(new StreamSource(IOUtils.toInputStream(value.getBody(), StandardCharsets.UTF_8)));
+		} catch (Throwable e) {
+			throw new RuntimeException("Error on parsing xslt", e);
+		}
+		try {
 			XsltTransformer trans = xslt.load();
 			trans.setInitialContextNode(source);
 			final StringWriter output = new StringWriter();
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json
@ -17,6 +17,18 @@
    "paramDescription": "the path to get the input data from Pubmed",
    "paramRequired": true
  },
+  {
+    "paramName": "oip",
+    "paramLongName": "openapcInputPath",
+    "paramDescription": "the path to get the input data from OpenAPC",
+    "paramRequired": true
+  },
+  {
+    "paramName": "dip",
+    "paramLongName": "dataciteInputPath",
+    "paramDescription": "the path to get the input data from Datacite",
+    "paramRequired": true
+  },
  {
    "paramName": "o",
    "paramLongName": "outputPath",
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -31,6 +31,9 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
 # The following is needed as a property of a workflow
 oozie.wf.application.path=${oozieTopWfApplicationPath}

-crossrefInputPath=/data/bip-affiliations/data.json
+crossrefInputPath=/data/bip-affiliations/crossref-data.json
 pubmedInputPath=/data/bip-affiliations/pubmed-data.json
+openapcInputPath=/data/bip-affiliations/openapc-data.json
+dataciteInputPath=/data/bip-affiliations/datacite-data.json
+
 outputPath=/tmp/crossref-affiliations-output-v5
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -9,6 +9,14 @@
            <name>pubmedInputPath</name>
            <description>the path where to find the inferred affiliation relations from Pubmed</description>
        </property>
+        <property>
+            <name>openapcInputPath</name>
+            <description>the path where to find the inferred affiliation relations from OpenAPC</description>
+        </property>
+        <property>
+            <name>dataciteInputPath</name>
+            <description>the path where to find the inferred affiliation relations from Datacite</description>
+        </property>
        <property>
            <name>outputPath</name>
            <description>the path where to store the actionset</description>
@ -102,6 +110,9 @@
            </spark-opts>
            <arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
            <arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
+            <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
+            <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
+
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json
@ -16,5 +16,10 @@
    "paramLongName": "outputPath",
    "paramDescription": "the path of the new ActionSet",
    "paramRequired": true
-  }
+  },  {
+  "paramName": "fd",
+  "paramLongName": "distributeDoi",
+  "paramDescription": "the path of the new ActionSet",
+  "paramRequired": false
+}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/as_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/as_parameters.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName": "sp",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the zipped opencitations file",
+    "paramRequired": true
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "outputPath",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/oozie_app/config-default.xml
@ -0,0 +1,30 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/fosnodoi/oozie_app/workflow.xml
@ -0,0 +1,153 @@
+
+<workflow-app name="FOS no doi" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>fosPath</name>
+            <description>the input path of the resources to be extended</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the path where to store the actionset</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+    <start to="getFOS"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+
+
+    <action name="getFOS">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Gets Data from FOS csv file</name>
+            <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSSparkJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${fosPath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/input/fos</arg>
+            <arg>--delimiter</arg><arg>${delimiter}</arg>
+        </spark>
+        <ok to="prepareFos"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="prepareFos">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the results from FOS</name>
+            <class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/input/fos</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/prepared</arg>
+            <arg>--distributeDoi</arg><arg>false</arg>
+        </spark>
+        <ok to="produceActionSet"/>
+        <error to="Kill"/>
+    </action>
+
+
+
+    <action name="produceActionSet">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Save the action set grouping results with the same id</name>
+            <class>eu.dnetlib.dhp.actionmanager.fosnodoi.CreateActionSetSparkJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/prepared/fos</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json
@ -1,13 +1,13 @@
 [
  {
-    "paramName": "if",
-    "paramLongName": "inputFile",
+    "paramName": "ip",
+    "paramLongName": "inputPath",
    "paramDescription": "the zipped opencitations file",
    "paramRequired": true
  },
  {
-    "paramName": "wp",
-    "paramLongName": "workingPath",
+    "paramName": "op",
+    "paramLongName": "outputPath",
    "paramDescription": "the working path",
    "paramRequired": true
  },
@ -16,11 +16,5 @@
    "paramLongName": "hdfsNameNode",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
-  },
-  {
-    "paramName": "p",
-    "paramLongName": "prefix",
-    "paramDescription": "COCI or POCI",
-    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json
@ -1,7 +1,7 @@
 [
  {
-    "paramName": "wp",
-    "paramLongName": "workingPath",
+    "paramName": "ip",
+    "paramLongName": "inputPath",
    "paramDescription": "the zipped opencitations file",
    "paramRequired": true
  },
@ -24,15 +24,9 @@
    "paramLongName": "outputPath",
    "paramDescription": "the hdfs name node",
    "paramRequired": true
-  },
-  {
-    "paramName": "if",
-    "paramLongName": "inputFile",
-    "paramDescription": "the hdfs name node",
-    "paramRequired": true
-  }, {
-  "paramName": "f",
-  "paramLongName": "format",
+  },  {
+  "paramName": "nn",
+  "paramLongName": "hdfsNameNode",
  "paramDescription": "the hdfs name node",
  "paramRequired": true
 }
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml
@ -27,7 +27,9 @@
            <case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
            <case to="extract">${wf:conf('resumeFrom') eq 'ExtractContent'}</case>
            <case to="read">${wf:conf('resumeFrom') eq 'ReadContent'}</case>
-            <default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
+            <case to="remap">${wf:conf('resumeFrom') eq 'MapContent'}</case>
+            <case to="create_actionset">${wf:conf('resumeFrom') eq 'CreateAS'}</case>
+            <default to="deleteoutputpath"/> <!-- first action to be done when downloadDump is to be performed -->
        </switch>
    </decision>

@ -35,6 +37,15 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

+    <action name="deleteoutputpath">
+        <fs>
+            <delete path='${inputPath}'/>
+            <mkdir path='${inputPath}'/>
+        </fs>
+        <ok to="download"/>
+        <error to="Kill"/>
+    </action>
+
    <action name="download">
        <shell xmlns="uri:oozie:shell-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
@ -47,7 +58,28 @@
            </configuration>
            <exec>download.sh</exec>
            <argument>${filelist}</argument>
-            <argument>${workingPath}/${prefix}/Original</argument>
+            <argument>${inputPath}/Original</argument>
+            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
+            <file>download.sh</file>
+            <capture-output/>
+        </shell>
+        <ok to="download_correspondence"/>
+        <error to="Kill"/>
+    </action>
+<!--    downloads the correspondence from the omid and the pid (doi, pmid etc)-->
+    <action name="download_correspondence">
+        <shell xmlns="uri:oozie:shell-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+            </configuration>
+            <exec>download_corr.sh</exec>
+            <argument>${filecorrespondence}</argument>
+            <argument>${inputPath}/correspondence</argument>
            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
            <file>download.sh</file>
            <capture-output/>
@ -60,9 +92,19 @@
        <java>
            <main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
-            <arg>--inputFile</arg><arg>${inputFile}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}/${prefix}</arg>
-            <arg>--prefix</arg><arg>${prefix}</arg>
+            <arg>--inputPath</arg><arg>${inputPath}/Original</arg>
+            <arg>--outputPath</arg><arg>${inputPath}/Extracted</arg>
+        </java>
+        <ok to="read"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extract_correspondence">
+        <java>
+            <main-class>eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs</main-class>
+            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
+            <arg>--inputPath</arg><arg>${inputPath}/correspondence</arg>
+            <arg>--outputPath</arg><arg>${inputPath}/correspondence_extracted</arg>
        </java>
        <ok to="read"/>
        <error to="Kill"/>
@ -85,11 +127,35 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}/${prefix}/${prefix}</arg>
-            <arg>--outputPath</arg><arg>${workingPath}/${prefix}/${prefix}_JSON/</arg>
+            <arg>--inputPath</arg><arg>${inputPath}/Extracted</arg>
+            <arg>--outputPath</arg><arg>${inputPath}/JSON</arg>
            <arg>--delimiter</arg><arg>${delimiter}</arg>
-            <arg>--inputFile</arg><arg>${inputFileCoci}</arg>
-            <arg>--format</arg><arg>${prefix}</arg>
+            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
+        </spark>
+        <ok to="remap"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="remap">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the AS for OC</name>
+            <class>eu.dnetlib.dhp.actionmanager.opencitations.MapOCIdsInPids</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}</arg>
+            <arg>--outputPath</arg><arg>${outputPathExtraction}</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
        </spark>
        <ok to="create_actionset"/>
        <error to="Kill"/>
@ -112,7 +178,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--inputPath</arg><arg>${workingPath}</arg>
+            <arg>--inputPath</arg><arg>${outputPathExtraction}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json
@ -0,0 +1,25 @@
+[
+  {
+    "paramName": "ip",
+    "paramLongName": "inputPath",
+    "paramDescription": "the zipped opencitations file",
+    "paramRequired": true
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "outputPath",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManged",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  },{
+  "paramName": "nn",
+  "paramLongName": "nameNode",
+  "paramDescription": "the hdfs name node",
+  "paramRequired": true
+}
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/as_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/as_parameters.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName": "ip",
+    "paramLongName": "inputPath",
+    "paramDescription": "the zipped opencitations file",
+    "paramRequired": true
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "outputPath",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/input_read_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/input_read_parameters.json
@ -0,0 +1,30 @@
+[
+
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  },
+  {
+    "paramName": "d",
+    "paramLongName": "delimiter",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": false
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "outputPath",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": true
+  },
+  {
+    "paramName": "if",
+    "paramLongName": "inputFile",
+    "paramDescription": "the hdfs name node",
+    "paramRequired": true
+  }
+]
+
+
+
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/config-default.xml
@ -0,0 +1,58 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>sparkExecutorNumber</name>
+        <value>4</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>sparkDriverMemory</name>
+        <value>15G</value>
+    </property>
+    <property>
+        <name>sparkExecutorMemory</name>
+        <value>6G</value>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <value>1</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/download.sh
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/download.sh
@ -0,0 +1,2 @@
+#!/bin/bash
+curl -L $1  | hdfs dfs -put - $2
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/transformativeagreement/oozie_app/workflow.xml
@ -0,0 +1,82 @@
+<workflow-app name="Transfomative Agreement Integration" xmlns="uri:oozie:workflow:0.5">
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+
+    <start to="resume_from"/>
+
+    <decision name="resume_from">
+        <switch>
+            <case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
+            <default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
+        </switch>
+    </decision>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="download">
+        <shell xmlns="uri:oozie:shell-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+            </configuration>
+            <exec>download.sh</exec>
+            <argument>${inputFile}</argument>
+            <argument>${workingDir}/transformativeagreement/transformativeAgreement.json</argument>
+            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
+            <file>download.sh</file>
+            <capture-output/>
+        </shell>
+        <ok to="create_actionset"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="create_actionset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the AS for the Transformative Agreement</name>
+            <class>eu.dnetlib.dhp.actionmanager.transformativeagreement.CreateActionSetSparkJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${workingDir}/transformativeagreement/</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json
@ -28,5 +28,11 @@
    "paramLongName": "workingPath",
    "paramDescription": "the workingPath where to save the content of the usage_stats table",
    "paramRequired": true
+  },
+  {
+    "paramName": "dp",
+    "paramLongName": "datasourcePath",
+    "paramDescription": "the workingPath where to save the content of the usage_stats table",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml
@ -90,6 +90,7 @@
            <arg>--outputPath</arg><arg>${outputPath}</arg>
            <arg>--usagestatsdb</arg><arg>${usagestatsdb}</arg>
            <arg>--workingPath</arg><arg>${workingDir}</arg>
+            <arg>--datasourcePath</arg><arg>${datasourcePath}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql
@ -0,0 +1,114 @@
+BEGIN;
+
+INSERT INTO dsm_services(
+	_dnet_resource_identifier_, 
+	id,
+	officialname,
+	englishname,
+	namespaceprefix,
+	websiteurl,
+	logourl,
+	platform,
+	contactemail,
+	collectedfrom,
+	provenanceaction,
+	_typology_to_remove_,
+	eosc_type,
+	eosc_datasource_type,
+	research_entity_types,
+	thematic
+) VALUES (
+	'openaire____::base_search',
+	'openaire____::base_search',
+	'Bielefeld Academic Search Engine (BASE)',
+	'Bielefeld Academic Search Engine (BASE)',
+	'base_search_',
+	'https://www.base-search.net',
+	'https://www.base-search.net/about/download/logo_224x57_white.gif',
+	'BASE',
+	'openaire-helpdesk@uni-bielefeld.de',
+	'infrastruct_::openaire',
+	'user:insert',
+	'aggregator::pubsrepository::unknown',
+	'Data Source',
+	'Aggregator',
+	ARRAY['Research Products'],
+	false
+);
+
+INSERT INTO dsm_service_organization(
+	_dnet_resource_identifier_,
+	organization,
+	service
+) VALUES (
+	'fairsharing_::org::214@@openaire____::base_search',
+	'fairsharing_::org::214',
+	'openaire____::base_search'
+);
+
+INSERT INTO dsm_api(
+	_dnet_resource_identifier_,
+	id,
+	service,
+	protocol,
+	baseurl,
+	metadata_identifier_path
+) VALUES (
+	'api_________::openaire____::base_search::dump',
+	'api_________::openaire____::base_search::dump',
+	'openaire____::base_search',
+	'baseDump',
+	'/user/michele.artini/base-import/base_oaipmh_dump-current.tar',
+	'//*[local-name()=''header'']/*[local-name()=''identifier'']'
+);
+
+
+INSERT INTO dsm_apiparams(
+	_dnet_resource_identifier_, 
+	api, 
+	param, 
+	value
+) VALUES (
+	'api_________::openaire____::base_search::dump@@dbUrl',
+	'api_________::openaire____::base_search::dump',
+	'dbUrl',
+	'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus'
+);
+
+INSERT INTO dsm_apiparams(
+	_dnet_resource_identifier_, 
+	api, 
+	param, 
+	value
+) VALUES (
+	'api_________::openaire____::base_search::dump@@dbUser',
+	'api_________::openaire____::base_search::dump',
+	'dbUser',
+	'dnet'
+);
+
+INSERT INTO dsm_apiparams(
+	_dnet_resource_identifier_, 
+	api, 
+	param, 
+	value
+) VALUES (
+	'api_________::openaire____::base_search::dump@@dbPassword',
+	'api_________::openaire____::base_search::dump',
+	'dbPassword',
+	'***'
+);
+
+INSERT INTO dsm_apiparams(
+	_dnet_resource_identifier_, 
+	api, 
+	param, 
+	value
+) VALUES (
+	'api_________::openaire____::base_search::dump@@acceptedNormTypes',
+	'api_________::openaire____::base_search::dump',
+	'acceptedNormTypes',
+	'1,11,111,121,14,15,18,181,182,183,1A,6,7'
+);
+
+COMMIT;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql
@ -0,0 +1,9 @@
+select s.id as id 
+from dsm_services s 
+where collectedfrom = 'openaire____::opendoar' 
+and jurisdiction = 'Institutional'
+and s.id in (
+	select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN'
+) and s.id not in (
+	select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%'
+);
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-aggregation-status.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-aggregation-status.sql
@ -0,0 +1,11 @@
+select 
+	s.id           as id, 
+	s.jurisdiction as jurisdiction, 
+	array_remove(array_agg(a.id || ' (compliance: ' || coalesce(a.compatibility_override, a.compatibility, 'UNKNOWN') || ')@@@' || coalesce(a.last_collection_total, 0)), NULL) as aggregations
+from 
+	dsm_services s 
+	join dsm_api a on (s.id = a.service) 
+where 
+	collectedfrom = 'openaire____::opendoar'
+group by 
+	s.id;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base-types.vocabulary.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base-types.vocabulary.xml
@ -0,0 +1,180 @@
+<RESOURCE_PROFILE>
+	<HEADER>
+		<RESOURCE_IDENTIFIER value="c67911d6-9988-4a3b-b965-7d39bdd4a31d_Vm9jYWJ1bGFyeURTUmVzb3VyY2VzL1ZvY2FidWxhcnlEU1Jlc291cmNlVHlwZQ==" />
+		<RESOURCE_TYPE value="VocabularyDSResourceType" />
+		<RESOURCE_KIND value="VocabularyDSResources" />
+		<RESOURCE_URI value="" />
+		<DATE_OF_CREATION value="2024-02-13T11:15:48+00:00" />
+	</HEADER>
+	<BODY>
+		<CONFIGURATION>
+			<VOCABULARY_NAME code="base:normalized_types">base:normalized_types</VOCABULARY_NAME>
+			<VOCABULARY_DESCRIPTION>base:normalized_types</VOCABULARY_DESCRIPTION>
+			<TERMS>
+				<TERM native_name="Text" code="Text" english_name="Text" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="1" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Book" code="Book" english_name="Book" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="11" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Book part" code="Book part" english_name="Book part" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="111" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Journal/Newspaper" code="Journal/Newspaper" english_name="Journal/Newspaper" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="12" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Article contribution" code="Article contribution" english_name="Article contribution" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="121" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Other non-article" code="Other non-article" english_name="Other non-article" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="122" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Conference object" code="Conference object" english_name="Conference object" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="13" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Report" code="Report" english_name="Report" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="14" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Review" code="Review" english_name="Review" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="15" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Course material" code="Course material" english_name="Course material" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="16" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Lecture" code="Lecture" english_name="Lecture" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="17" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Thesis" code="Thesis" english_name="Thesis" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="18" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Bachelor's thesis" code="Bachelor's thesis" english_name="Bachelor's thesis" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="181" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Master's thesis" code="Master's thesis" english_name="Master's thesis" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="182" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Doctoral and postdoctoral thesis" code="Doctoral and postdoctoral thesis" english_name="Doctoral and postdoctoral thesis" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="183" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Manuscript" code="Manuscript" english_name="Manuscript" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="19" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Patent" code="Patent" english_name="Patent" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="1A" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Musical notation" code="Musical notation" english_name="Musical notation" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="2" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Map" code="Map" english_name="Map" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="3" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Audio" code="Audio" english_name="Audio" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="4" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Image/Video" code="Image/Video" english_name="Image/Video" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="5" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Still image" code="Still image" english_name="Still image" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="51" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Moving image/Video" code="Moving image/Video" english_name="Moving image/Video" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="52" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Software" code="Software" english_name="Software" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="6" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Dataset" code="Dataset" english_name="Dataset" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="7" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+				<TERM native_name="Unknown" code="Unknown" english_name="Unknown" encoding="BASE">
+					<SYNONYMS>
+						<SYNONYM term="F" encoding="BASE" />
+					</SYNONYMS>
+					<RELATIONS />
+				</TERM>
+
+			</TERMS>
+		</CONFIGURATION>
+		<STATUS>
+			<LAST_UPDATE value="2013-11-18T10:46:36Z" />
+		</STATUS>
+		<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
+	</BODY>
+</RESOURCE_PROFILE>
+
+                
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2oaf.transformationRule.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2oaf.transformationRule.xml
@ -0,0 +1,444 @@
+<RESOURCE_PROFILE>
+	<HEADER>
+		<RESOURCE_IDENTIFIER value="" />
+		<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
+		<RESOURCE_KIND value="TransformationRuleDSResources" />
+		<RESOURCE_URI value="" />
+		<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
+	</HEADER>
+	<BODY>
+		<CONFIGURATION>
+			<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
+			<SINK_METADATA_FORMAT name="oaf_hbase" />
+			<IMPORTED />
+			<SCRIPT>
+				<TITLE>xslt_base2oaf_hadoop</TITLE>
+				<CODE>
+					<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
+						xmlns:base_dc="http://oai.base-search.net/base_dc/"
+						xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+						xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
+						xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
+						exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
+						<xsl:param name="varOfficialName" />
+						<xsl:param name="varDataSourceId" />
+						<xsl:param name="varFP7" select="'corda_______::'" />
+						<xsl:param name="varH2020" select="'corda__h2020::'" />
+						<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
+						<xsl:param name="index" select="0" />
+						<xsl:param name="transDate" select="current-dateTime()" />
+
+						<xsl:template name="terminate">
+							<xsl:message terminate="yes">
+								record is not compliant, transformation is interrupted.
+							</xsl:message>
+						</xsl:template>
+
+						<xsl:template match="/">
+							<record>
+								<xsl:apply-templates select="//*[local-name() = 'header']" />
+
+
+<!-- TO EVALUATE
+base_dc:authod_id
+base_dc:authod_id/base_dc:creator_id
+base_dc:authod_id/base_dc:creator_name
+
+example:
+
+<dc:creator>ALBU, Svetlana</dc:creator>
+
+<base_dc:authod_id>
+	<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
+    <base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
+</base_dc:authod_id>
+-->
+
+<!-- NOT USED 
+base_dc:global_id (I used oai:identifier)
+base_dc:collection/text()
+
+base_dc:continent
+base_dc:country
+base_dc:year (I used dc:date)
+dc:coverage
+dc:language (I used base_dc:lang)
+base_dc:link (I used dc:identifier)
+-->
+
+								<metadata>
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:title" />
+										<xsl:with-param name="targetElement" select="'dc:title'" />
+									</xsl:call-template>
+
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
+										<xsl:with-param name="targetElement" select="'dc:creator'" />
+									</xsl:call-template>
+
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:contributor" />
+										<xsl:with-param name="targetElement" select="'dc:contributor'" />
+									</xsl:call-template>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:description" />
+										<xsl:with-param name="targetElement" select="'dc:description'" />
+									</xsl:call-template>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:subject" />
+										<xsl:with-param name="targetElement" select="'dc:subject'" />
+									</xsl:call-template>
+									
+									<!-- TODO: I'm not sure if this is the correct encoding -->
+									<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
+										<dc:subject><xsl:value-of select="concat(@type, ':', .)" /></dc:subject>
+									</xsl:for-each>
+									<!-- END TODO -->
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:publisher" />
+										<xsl:with-param name="targetElement" select="'dc:publisher'" />
+									</xsl:call-template>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:format" />
+										<xsl:with-param name="targetElement" select="'dc:format'" />
+									</xsl:call-template>
+									
+									
+									<xsl:for-each select="//base_dc:typenorm">
+										<dc:type>
+											<xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" />
+										</dc:type>
+									</xsl:for-each>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:type" />
+										<xsl:with-param name="targetElement" select="'dc:type'" />
+									</xsl:call-template>
+									
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:source" />
+										<xsl:with-param name="targetElement" select="'dc:source'" />
+									</xsl:call-template>
+									
+									<dc:language>
+										<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
+									</dc:language>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:rights" />
+										<xsl:with-param name="targetElement" select="'dc:rights'" />
+									</xsl:call-template>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:relation" />
+										<xsl:with-param name="targetElement" select="'dc:relation'" />
+									</xsl:call-template>
+									
+									<xsl:if test="not(//dc:identifier[starts-with(., 'http')])">
+										<xsl:call-template name="terminate" />
+									</xsl:if>
+									
+									<xsl:call-template name="allElements">
+										<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
+										<xsl:with-param name="targetElement" select="'dc:identifier'" />
+									</xsl:call-template>
+									
+									<xsl:for-each select="//dc:relation">
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
+											<oaf:projectid>
+												<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
+											</oaf:projectid>
+										</xsl:if>
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
+											<oaf:projectid>
+												<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
+											</oaf:projectid>
+										</xsl:if>
+									</xsl:for-each>
+
+									<xsl:choose>
+										<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority  -->
+										
+										<!-- Book part -->
+										<xsl:when test="//base_dc:typenorm = '111'">
+											<dr:CobjCategory type="publication">0013</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Book -->
+										<xsl:when test="//base_dc:typenorm = '11'">
+											<dr:CobjCategory type="publication">0002</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Article contribution -->
+										<xsl:when test="//base_dc:typenorm = '121'">
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
+										</xsl:when>
+										
+																				
+										<!-- Journal/Newspaper -->
+										<xsl:when test="//base_dc:typenorm = '12'">
+											<dr:CobjCategory type="publication">0043</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Report -->
+										<xsl:when test="//base_dc:typenorm = '14'">
+											<dr:CobjCategory type="publication">0017</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Review -->
+										<xsl:when test="//base_dc:typenorm = '15'">
+											<dr:CobjCategory type="publication">0015</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Lecture -->
+										<xsl:when test="//base_dc:typenorm = '17'">
+											<dr:CobjCategory type="publication">0010</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Bachelor's thesis -->
+										<xsl:when test="//base_dc:typenorm = '181'">
+											<dr:CobjCategory type="publication">0008</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Master's thesis -->
+										<xsl:when test="//base_dc:typenorm = '182'">
+											<dr:CobjCategory type="publication">0007</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Doctoral and postdoctoral thesis -->
+										<xsl:when test="//base_dc:typenorm = '183'">
+											<dr:CobjCategory type="publication">0006</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Thesis -->
+										<xsl:when test="//base_dc:typenorm = '18'">
+											<dr:CobjCategory type="publication">0044</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Patent -->
+										<xsl:when test="//base_dc:typenorm = '1A'">
+											<dr:CobjCategory type="publication">0019</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Text -->
+										<xsl:when test="//base_dc:typenorm = '1'">
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Software -->
+										<xsl:when test="//base_dc:typenorm = '6'">
+											<dr:CobjCategory type="software">0029</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Dataset -->										
+										<xsl:when test="//base_dc:typenorm = '7'">
+											<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
+										</xsl:when>										
+																				
+										<!-- Still image -->
+										<xsl:when test="//base_dc:typenorm = '51'">
+											<dr:CobjCategory type="other">0025</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Moving image/Video -->										
+										<xsl:when test="//base_dc:typenorm = '52'">
+											<dr:CobjCategory type="other">0024</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Image/Video -->
+										<xsl:when test="//base_dc:typenorm = '5'">
+											<dr:CobjCategory type="other">0033</dr:CobjCategory>
+										</xsl:when>
+
+										<!-- Audio -->
+										<xsl:when test="//base_dc:typenorm = '4'">
+											<dr:CobjCategory type="other">0030</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Musical notation -->
+										<xsl:when test="//base_dc:typenorm = '2'">
+											<dr:CobjCategory type="other">0020</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Map -->
+										<xsl:when test="//base_dc:typenorm = '3'">
+											<dr:CobjCategory type="other">0020</dr:CobjCategory>
+										</xsl:when>										
+										
+										<!-- Other non-article -->
+										<xsl:when test="//base_dc:typenorm = '122'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Course material -->
+										<xsl:when test="//base_dc:typenorm = '16'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Manuscript -->
+										<xsl:when test="//base_dc:typenorm = '19'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Conference object -->
+										<xsl:when test="//base_dc:typenorm = '13'">
+											<dr:CobjCategory type="publication">0004</dr:CobjCategory>
+										</xsl:when>
+
+										<!-- Unknown -->
+										<xsl:when test="//base_dc:typenorm = 'F'">
+											<dr:CobjCategory type="other">0000</dr:CobjCategory>
+										</xsl:when>
+										<xsl:otherwise>
+											<dr:CobjCategory type="other">0000</dr:CobjCategory>
+										</xsl:otherwise>
+									</xsl:choose>
+									
+									
+									<oaf:accessrights>
+										<xsl:choose>
+											<xsl:when test="//base_dc:oa[.='0']">CLOSED</xsl:when>
+											<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
+											<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
+											<xsl:when test="//base_dc:rightsnorm">
+												<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
+											</xsl:when>
+											<xsl:when test="//dc:rights">
+												<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
+											</xsl:when>
+											<xsl:otherwise>UNKNOWN</xsl:otherwise>
+										</xsl:choose>
+									</oaf:accessrights>
+									
+									<xsl:for-each select="//base_dc:doi">
+										<oaf:identifier identifierType="doi">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
+										<oaf:identifier identifierType="url">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
+										<oaf:identifier identifierType="handle">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>									
+
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
+										<oaf:identifier identifierType='urn'>
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+									
+									<oaf:identifier identifierType="oai-original">
+										<xsl:value-of
+											select="//oai:header/oai:identifier" />
+									</oaf:identifier>
+									
+									<oaf:hostedBy>
+										<xsl:attribute name="name">
+											<xsl:value-of select="//base_dc:collname" />
+										</xsl:attribute>
+										<xsl:attribute name="id">
+											<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
+										</xsl:attribute>
+									</oaf:hostedBy>
+									
+									<oaf:collectedFrom>
+										<xsl:attribute name="name">
+											<xsl:value-of select="$varOfficialName" />
+										</xsl:attribute>
+										<xsl:attribute name="id">
+											<xsl:value-of select="$varDataSourceId" />
+										</xsl:attribute>
+									</oaf:collectedFrom>
+									
+									<oaf:dateAccepted>
+										<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
+									</oaf:dateAccepted>
+									
+									<xsl:if test="//base_dc:oa[.='1']">
+										<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
+											<oaf:fulltext>
+												<xsl:value-of select="normalize-space(.)" />
+											</oaf:fulltext>
+										</xsl:for-each>
+									</xsl:if>
+									
+									<xsl:for-each select="//base_dc:collection/@ror_id">
+										<oaf:relation relType="resultOrganization"
+										 	subRelType="affiliation"
+										 	relClass="hasAuthorInstitution"
+											targetType="organization">
+											<xsl:choose>
+												<xsl:when test="contains(.,'https://ror.org/')">
+													<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
+												</xsl:when>
+												<xsl:otherwise>
+													<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
+												</xsl:otherwise>
+											</xsl:choose>
+										</oaf:relation>
+									</xsl:for-each>
+									
+									<oaf:datainfo>
+										<oaf:inferred>false</oaf:inferred>
+										<oaf:deletedbyinference>false</oaf:deletedbyinference>
+										<oaf:trust>0.89</oaf:trust>
+										<oaf:inferenceprovenance/>
+										<oaf:provenanceaction classid="sysimport:crosswalk:aggregator"
+											classname="sysimport:crosswalk:aggregator"
+											schemeid="dnet:provenanceActions"
+											schemename="dnet:provenanceActions"/>
+									</oaf:datainfo>
+									
+								</metadata>
+								<xsl:copy-of select="//*[local-name() = 'about']" />
+							</record>
+						</xsl:template>
+
+						<xsl:template name="allElements">
+							<xsl:param name="sourceElement" />
+							<xsl:param name="targetElement" />
+							<xsl:for-each select="$sourceElement">
+								<xsl:element name="{$targetElement}">
+									<xsl:value-of select="normalize-space(.)" />
+								</xsl:element>
+							</xsl:for-each>
+						</xsl:template>
+
+						<xsl:template match="//*[local-name() = 'header']">
+							<xsl:if test="//oai:header/@status='deleted'">
+								<xsl:call-template name="terminate" />
+							</xsl:if>
+							<xsl:copy>
+								<xsl:apply-templates select="node()|@*" />
+								<xsl:element name="dr:dateOfTransformation">
+									<xsl:value-of select="$transDate" />
+								</xsl:element>
+							</xsl:copy>
+						</xsl:template>
+
+						<xsl:template match="node()|@*">
+							<xsl:copy>
+								<xsl:apply-templates select="node()|@*" />
+							</xsl:copy>
+						</xsl:template>
+					</xsl:stylesheet>
+				</CODE>
+			</SCRIPT>
+		</CONFIGURATION>
+		<STATUS />
+		<SECURITY_PARAMETERS />
+	</BODY>
+</RESOURCE_PROFILE>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2odf.transformationRule.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2odf.transformationRule.xml
@ -0,0 +1,472 @@
+<RESOURCE_PROFILE>
+	<HEADER>
+		<RESOURCE_IDENTIFIER value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" />
+		<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
+		<RESOURCE_KIND value="TransformationRuleDSResources" />
+		<RESOURCE_URI value="" />
+		<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
+	</HEADER>
+	<BODY>
+		<CONFIGURATION>
+			<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
+			<SINK_METADATA_FORMAT name="odf_hbase" />
+			<IMPORTED />
+			<SCRIPT>
+				<TITLE>xslt_base2odf_hadoop</TITLE>
+				<CODE>
+					<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:base_dc="http://oai.base-search.net/base_dc/"
+						xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+						xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
+						xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
+						exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
+						<xsl:param name="varOfficialName" />
+						<xsl:param name="varDataSourceId" />
+						<xsl:param name="varFP7" select="'corda_______::'" />
+						<xsl:param name="varH2020" select="'corda__h2020::'" />
+						<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
+						<xsl:param name="index" select="0" />
+						<xsl:param name="transDate" select="current-dateTime()" />
+
+						<xsl:template name="terminate">
+							<xsl:message terminate="yes">
+								record is not compliant, transformation is interrupted.
+							</xsl:message>
+						</xsl:template>
+
+						<xsl:template match="/">
+							<record>
+								<xsl:apply-templates select="//*[local-name() = 'header']" />
+
+
+								<!-- NOT USED 
+									base_dc:global_id (I used oai:identifier) 
+									base_dc:collection/text() 
+									base_dc:continent 
+									base_dc:country  
+									dc:coverage
+									dc:source
+									dc:relation
+									dc:type (I used //base_dc:typenorm)
+									dc:language (I used base_dc:lang) 
+									base_dc:link (I used dc:identifier)
+								 -->
+
+								<metadata>
+									<datacite:resource>
+
+										<xsl:for-each select="//base_dc:doi">
+											<datacite:identifier identifierType="DOI">
+												<xsl:value-of select="." />
+											</datacite:identifier>
+										</xsl:for-each>										
+
+										<datacite:alternateIdentifiers>
+											<xsl:for-each
+												select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
+												<datacite:identifier alternateIdentifierType="url">
+													<xsl:value-of select="." />
+												</datacite:identifier>
+											</xsl:for-each>
+
+											<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
+												<datacite:identifier alternateIdentifierType="handle">
+													<xsl:value-of select="." />
+												</datacite:identifier>
+											</xsl:for-each>
+
+											<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
+												<datacite:identifier alternateIdentifierType='urn'>
+													<xsl:value-of select="." />
+												</datacite:identifier>
+											</xsl:for-each>
+
+											<datacite:identifier alternateIdentifierType="oai-original">
+												<xsl:value-of
+													select="//oai:header/oai:identifier" />
+											</datacite:identifier>
+											
+										</datacite:alternateIdentifiers>
+
+										<datacite:relatedIdentifiers />
+
+
+										<xsl:for-each select="//base_dc:typenorm">
+											<datacite:resourceType><xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" /></datacite:resourceType>
+										</xsl:for-each>
+
+										<datacite:titles>
+											<xsl:for-each select="//dc:title">
+												<datacite:title>
+													<xsl:value-of select="normalize-space(.)" />
+												</datacite:title>
+											</xsl:for-each>
+										</datacite:titles>
+
+										<datacite:creators>
+											<xsl:for-each select="//dc:creator">
+												<xsl:variable name="author" select="normalize-space(.)" />
+												<datacite:creator>
+													<datacite:creatorName>
+														<xsl:value-of select="$author" />
+													</datacite:creatorName>
+													<xsl:for-each select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
+														<xsl:if test="contains(.,'https://orcid.org/')">
+															<nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">
+																<xsl:value-of select="substring-after(., 'https://orcid.org/')" />
+															</nameIdentifier>
+														</xsl:if>
+													</xsl:for-each>
+												</datacite:creator>
+											</xsl:for-each>
+										</datacite:creators>
+
+										<datacite:contributors>
+											<xsl:for-each select="//dc:contributor">
+												<datacite:contributor>
+													<datacite:contributorName>
+														<xsl:value-of select="normalize-space(.)" />
+													</datacite:contributorName>
+												</datacite:contributor>
+											</xsl:for-each>
+										</datacite:contributors>
+
+										<datacite:descriptions>
+											<xsl:for-each select="//dc:description">
+												<datacite:description descriptionType="Abstract">
+													<xsl:value-of select="normalize-space(.)" />
+												</datacite:description>
+											</xsl:for-each>
+										</datacite:descriptions>
+
+										<datacite:subjects>
+											<xsl:for-each select="//dc:subject">
+												<datacite:subject>
+													<xsl:value-of select="normalize-space(.)" />
+												</datacite:subject>
+											</xsl:for-each>
+											
+											<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
+												<datacite:subject subjectScheme="{@type}" classificationCode="{normalize-space(.)}">
+													<!-- TODO the value should be obtained by the Code -->
+													<xsl:value-of select="normalize-space(.)" />
+												</datacite:subject>
+											</xsl:for-each>
+										</datacite:subjects>
+										
+										<xsl:for-each select="//dc:publisher">
+											<datacite:publisher>
+												<xsl:value-of select="normalize-space(.)" />
+											</datacite:publisher>
+										</xsl:for-each>
+										
+										<xsl:for-each select="//base_dc:year">
+											<datacite:publicationYear>
+												<xsl:value-of select="normalize-space(.)" />
+											</datacite:publicationYear>
+										</xsl:for-each>
+																				
+										<datacite:formats>
+											<xsl:for-each select="//dc:format">
+												<datacite:format>
+													<xsl:value-of select="normalize-space(.)" />
+												</datacite:format>
+											</xsl:for-each>
+										</datacite:formats>
+										
+										<datacite:language>
+											<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
+										</datacite:language>
+
+										<oaf:accessrights>
+											<xsl:if test="//base_dc:oa[.='0']">
+												<datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
+											</xsl:if>
+											<xsl:if test="//base_dc:oa[.='1']">
+												<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
+											</xsl:if>
+											<xsl:for-each select="//dc:rights|//base_dc:rightsnorm">
+												<datacite:rights><xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')" /></datacite:rights>	
+											</xsl:for-each>
+										</oaf:accessrights>
+
+									</datacite:resource>
+
+									<xsl:for-each select="//dc:relation">
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
+											<oaf:projectid>
+												<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
+											</oaf:projectid>
+										</xsl:if>
+										<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
+											<oaf:projectid>
+												<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
+											</oaf:projectid>
+										</xsl:if>
+									</xsl:for-each>
+
+									<xsl:choose>
+										<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority  -->
+										
+										<!-- Book part -->
+										<xsl:when test="//base_dc:typenorm = '111'">
+											<dr:CobjCategory type="publication">0013</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Book -->
+										<xsl:when test="//base_dc:typenorm = '11'">
+											<dr:CobjCategory type="publication">0002</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Article contribution -->
+										<xsl:when test="//base_dc:typenorm = '121'">
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
+										</xsl:when>
+										
+																				
+										<!-- Journal/Newspaper -->
+										<xsl:when test="//base_dc:typenorm = '12'">
+											<dr:CobjCategory type="publication">0043</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Report -->
+										<xsl:when test="//base_dc:typenorm = '14'">
+											<dr:CobjCategory type="publication">0017</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Review -->
+										<xsl:when test="//base_dc:typenorm = '15'">
+											<dr:CobjCategory type="publication">0015</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Lecture -->
+										<xsl:when test="//base_dc:typenorm = '17'">
+											<dr:CobjCategory type="publication">0010</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Bachelor's thesis -->
+										<xsl:when test="//base_dc:typenorm = '181'">
+											<dr:CobjCategory type="publication">0008</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Master's thesis -->
+										<xsl:when test="//base_dc:typenorm = '182'">
+											<dr:CobjCategory type="publication">0007</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Doctoral and postdoctoral thesis -->
+										<xsl:when test="//base_dc:typenorm = '183'">
+											<dr:CobjCategory type="publication">0006</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Thesis -->
+										<xsl:when test="//base_dc:typenorm = '18'">
+											<dr:CobjCategory type="publication">0044</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Patent -->
+										<xsl:when test="//base_dc:typenorm = '1A'">
+											<dr:CobjCategory type="publication">0019</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Text -->
+										<xsl:when test="//base_dc:typenorm = '1'">
+											<dr:CobjCategory type="publication">0001</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Software -->
+										<xsl:when test="//base_dc:typenorm = '6'">
+											<dr:CobjCategory type="software">0029</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Dataset -->										
+										<xsl:when test="//base_dc:typenorm = '7'">
+											<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
+										</xsl:when>										
+																				
+										<!-- Still image -->
+										<xsl:when test="//base_dc:typenorm = '51'">
+											<dr:CobjCategory type="other">0025</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Moving image/Video -->										
+										<xsl:when test="//base_dc:typenorm = '52'">
+											<dr:CobjCategory type="other">0024</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Image/Video -->
+										<xsl:when test="//base_dc:typenorm = '5'">
+											<dr:CobjCategory type="other">0033</dr:CobjCategory>
+										</xsl:when>
+
+										<!-- Audio -->
+										<xsl:when test="//base_dc:typenorm = '4'">
+											<dr:CobjCategory type="other">0030</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Musical notation -->
+										<xsl:when test="//base_dc:typenorm = '2'">
+											<dr:CobjCategory type="other">0020</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Map -->
+										<xsl:when test="//base_dc:typenorm = '3'">
+											<dr:CobjCategory type="other">0020</dr:CobjCategory>
+										</xsl:when>										
+										
+										<!-- Other non-article -->
+										<xsl:when test="//base_dc:typenorm = '122'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Course material -->
+										<xsl:when test="//base_dc:typenorm = '16'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Manuscript -->
+										<xsl:when test="//base_dc:typenorm = '19'">
+											<dr:CobjCategory type="publication">0038</dr:CobjCategory>
+										</xsl:when>
+										
+										<!-- Conference object -->
+										<xsl:when test="//base_dc:typenorm = '13'">
+											<dr:CobjCategory type="publication">0004</dr:CobjCategory>
+										</xsl:when>
+
+										<!-- Unknown -->
+										<xsl:when test="//base_dc:typenorm = 'F'">
+											<dr:CobjCategory type="other">0000</dr:CobjCategory>
+										</xsl:when>
+										<xsl:otherwise>
+											<dr:CobjCategory type="other">0000</dr:CobjCategory>
+										</xsl:otherwise>
+									</xsl:choose>
+									
+									<oaf:accessrights>
+										<xsl:choose>
+											<xsl:when test="//base_dc:oa[.='0']">CLOSED</xsl:when>
+											<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
+											<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
+											<xsl:when test="//base_dc:rightsnorm">
+												<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
+											</xsl:when>
+											<xsl:when test="//dc:rights">
+												<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
+											</xsl:when>
+											<xsl:otherwise>UNKNOWN</xsl:otherwise>
+										</xsl:choose>
+									</oaf:accessrights>
+
+									<xsl:for-each select="//base_dc:doi">
+										<oaf:identifier identifierType="doi">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<xsl:for-each
+										select="distinct-values(//dc:identifier[starts-with(., 'http') and ( not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
+										<oaf:identifier identifierType="url">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
+										<oaf:identifier identifierType="handle">
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
+										<oaf:identifier identifierType='urn'>
+											<xsl:value-of select="." />
+										</oaf:identifier>
+									</xsl:for-each>
+
+									<oaf:identifier identifierType="oai-original">
+										<xsl:value-of
+											select="//oai:header/oai:identifier" />
+									</oaf:identifier>
+
+									<oaf:hostedBy>
+										<xsl:attribute name="name">
+											<xsl:value-of select="//base_dc:collname" />
+										</xsl:attribute>
+										<xsl:attribute name="id">
+											<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
+										</xsl:attribute>
+									</oaf:hostedBy>
+
+									<oaf:collectedFrom>
+										<xsl:attribute name="name">
+											<xsl:value-of select="$varOfficialName" />
+										</xsl:attribute>
+										<xsl:attribute name="id">
+											<xsl:value-of select="$varDataSourceId" />
+										</xsl:attribute>
+									</oaf:collectedFrom>
+
+									<oaf:dateAccepted>
+										<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
+									</oaf:dateAccepted>
+
+									<xsl:if test="//base_dc:oa[.='1']">
+										<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
+											<oaf:fulltext>
+												<xsl:value-of select="normalize-space(.)" />
+											</oaf:fulltext>
+										</xsl:for-each>
+									</xsl:if>
+
+									<xsl:for-each select="//base_dc:collection/@ror_id">
+										<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution" targetType="organization">
+											<xsl:choose>
+												<xsl:when test="contains(.,'https://ror.org/')">
+													<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
+												</xsl:when>
+												<xsl:otherwise>
+													<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
+												</xsl:otherwise>
+											</xsl:choose>
+										</oaf:relation>
+									</xsl:for-each>
+									
+									<oaf:datainfo>
+										<oaf:inferred>false</oaf:inferred>
+										<oaf:deletedbyinference>false</oaf:deletedbyinference>
+										<oaf:trust>0.89</oaf:trust>
+										<oaf:inferenceprovenance/>
+										<oaf:provenanceaction classid="sysimport:crosswalk:aggregator"
+											classname="sysimport:crosswalk:aggregator"
+											schemeid="dnet:provenanceActions"
+											schemename="dnet:provenanceActions"/>
+									</oaf:datainfo>
+								</metadata>
+								<xsl:copy-of select="//*[local-name() = 'about']" />
+							</record>
+						</xsl:template>
+
+						<xsl:template match="//*[local-name() = 'header']">
+							<xsl:if test="//oai:header/@status='deleted'">
+								<xsl:call-template name="terminate" />
+							</xsl:if>
+							<xsl:copy>
+								<xsl:apply-templates select="node()|@*" />
+								<xsl:element name="dr:dateOfTransformation">
+									<xsl:value-of select="$transDate" />
+								</xsl:element>
+							</xsl:copy>
+						</xsl:template>
+
+						<xsl:template match="node()|@*">
+							<xsl:copy>
+								<xsl:apply-templates select="node()|@*" />
+							</xsl:copy>
+						</xsl:template>
+					</xsl:stylesheet>
+				</CODE>
+			</SCRIPT>
+		</CONFIGURATION>
+		<STATUS />
+		<SECURITY_PARAMETERS />
+	</BODY>
+</RESOURCE_PROFILE>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
@ -1054,4 +1054,5 @@
  "datacite_name": "EBRAINS",
  "official_name": "EBRAINS"
 }
+
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -78,10 +78,6 @@ public class PrepareAffiliationRelationsTest {
 			.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
 			.getPath();

-		String pubmedAffiliationRelationsPath = getClass()
-			.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
-			.getPath();
-
 		String outputPath = workingDir.toString() + "/actionSet";

 		PrepareAffiliationRelations
@ -89,7 +85,9 @@ public class PrepareAffiliationRelationsTest {
 				new String[] {
 					"-isSparkSessionManaged", Boolean.FALSE.toString(),
 					"-crossrefInputPath", crossrefAffiliationRelationPath,
-					"-pubmedInputPath", pubmedAffiliationRelationsPath,
+					"-pubmedInputPath", crossrefAffiliationRelationPath,
+					"-openapcInputPath", crossrefAffiliationRelationPath,
+					"-dataciteInputPath", crossrefAffiliationRelationPath,
 					"-outputPath", outputPath
 				});

@ -106,7 +104,7 @@ public class PrepareAffiliationRelationsTest {
 //            );
 //        }
 		// count the number of relations
-		assertEquals(40, tmp.count());
+		assertEquals(80, tmp.count());

 		Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
 		dataset.createOrReplaceTempView("result");
@ -117,7 +115,7 @@ public class PrepareAffiliationRelationsTest {
 		// verify that we have equal number of bi-directional relations
 		Assertions
 			.assertEquals(
-				20, execVerification
+				40, execVerification
 					.filter(
 						"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
 					.collectAsList()
@ -125,7 +123,7 @@ public class PrepareAffiliationRelationsTest {

 		Assertions
 			.assertEquals(
-				20, execVerification
+				40, execVerification
 					.filter(
 						"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
 					.collectAsList()
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/fosnodoi/GetFosTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/fosnodoi/GetFosTest.java
@ -0,0 +1,104 @@
+
+package eu.dnetlib.dhp.actionmanager.fosnodoi;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSSparkJob;
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareTest;
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.ProduceTest;
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
+
+/**
+ * @author miriam.baglioni
+ * @Date 13/02/23
+ */
+public class GetFosTest {
+
+	private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
+
+	private static Path workingDir;
+	private static SparkSession spark;
+	private static LocalFileSystem fs;
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
+
+		fs = FileSystem.getLocal(new Configuration());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(ProduceTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(PrepareTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	@Disabled
+	void test3() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/fosnodoi/fosnodoi.csv")
+			.getPath();
+
+		final String outputPath = workingDir.toString() + "/fos.json";
+		GetFOSSparkJob
+			.main(
+				new String[] {
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--sourcePath", sourcePath,
+
+					"-outputPath", outputPath,
+					"-delimiter", ","
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<FOSDataModel> tmp = sc
+			.textFile(outputPath)
+			.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
+
+		tmp.foreach(t -> Assertions.assertTrue(t.getOaid() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null));
+		tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null));
+
+		tmp.foreach(t -> System.out.println(new ObjectMapper().writeValueAsString(t)));
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/fosnodoi/PrepareTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/fosnodoi/PrepareTest.java
@ -0,0 +1,99 @@
+
+package eu.dnetlib.dhp.actionmanager.fosnodoi;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob;
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob;
+import eu.dnetlib.dhp.actionmanager.createunresolvedentities.ProduceTest;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class PrepareTest {
+
+	private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
+
+	private static Path workingDir;
+	private static SparkSession spark;
+	private static LocalFileSystem fs;
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
+
+		fs = FileSystem.getLocal(new Configuration());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(ProduceTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(PrepareTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void fosPrepareTest() throws Exception {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/fosnodoi/fosnodoi.json")
+			.getPath();
+
+		PrepareFOSSparkJob
+			.main(
+				new String[] {
+					"--isSparkSessionManaged", Boolean.FALSE.toString(),
+					"--sourcePath", sourcePath,
+
+					"-outputPath", workingDir.toString() + "/work",
+					"-distributeDoi", Boolean.FALSE.toString()
+
+				});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<Result> tmp = sc
+			.textFile(workingDir.toString() + "/work/fos")
+			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
+
+		tmp.foreach(t -> System.out.println(new ObjectMapper().writeValueAsString(t)));
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java
@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest {

 		String inputPath = getClass()
 			.getResource(
-				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI/inputremap/jsonforas")
 			.getPath();

 		CreateActionSetSparkJob
@ -84,8 +84,6 @@ public class CreateOpenCitationsASTest {
 				new String[] {
 					"-isSparkSessionManaged",
 					Boolean.FALSE.toString(),
-					"-shouldDuplicateRels",
-					Boolean.TRUE.toString(),
 					"-inputPath",
 					inputPath,
 					"-outputPath",
@ -99,9 +97,10 @@ public class CreateOpenCitationsASTest {
 			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
 			.map(aa -> ((Relation) aa.getPayload()));

-		assertEquals(31, tmp.count());
+		Assertions.assertEquals(27, tmp.count());
+		tmp.foreach(r -> Assertions.assertEquals(1, r.getCollectedfrom().size()));

-		// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
+		tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));

 	}

--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java
@ -0,0 +1,90 @@
+
+package eu.dnetlib.dhp.actionmanager.opencitations;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
+
+/**
+ * @author miriam.baglioni
+ * @Date 07/03/24
+ */
+public class RemapTest {
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(RemapTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(RemapTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(RemapTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(RemapTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testRemap() throws Exception {
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI/inputremap")
+			.getPath();
+
+		MapOCIdsInPids
+			.main(
+				new String[] {
+					"-isSparkSessionManged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/out/",
+					"-nameNode", "input1;input2;input3;input4;input5"
+				});
+
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java
@ -0,0 +1,324 @@
+
+package eu.dnetlib.dhp.actionmanager.transformativeagreement;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob;
+import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
+import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
+
+/**
+ * @author miriam.baglioni
+ * @Date 13/02/24
+ */
+public class CreateTAActionSetTest {
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(CreateOpenCitationsASTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(CreateTAActionSetTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(CreateTAActionSetTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(CreateTAActionSetTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void createActionSet() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/transformativeagreement/facts.json")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.transformativeagreement.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet1"
+				});
+
+	}
+
+	@Test
+	void testNumberofRelations2() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet2"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet2", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		assertEquals(23, tmp.count());
+
+		// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
+
+	}
+
+	@Test
+	void testRelationsCollectedFrom() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet3"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet3", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		tmp.foreach(r -> {
+			assertEquals(ModelConstants.OPENOCITATIONS_NAME, r.getCollectedfrom().get(0).getValue());
+			assertEquals(ModelConstants.OPENOCITATIONS_ID, r.getCollectedfrom().get(0).getKey());
+		});
+
+	}
+
+	@Test
+	void testRelationsDataInfo() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet4"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet4", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		tmp.foreach(r -> {
+			assertEquals(false, r.getDataInfo().getInferred());
+			assertEquals(false, r.getDataInfo().getDeletedbyinference());
+			assertEquals("0.91", r.getDataInfo().getTrust());
+			assertEquals(
+				eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob.OPENCITATIONS_CLASSID,
+				r.getDataInfo().getProvenanceaction().getClassid());
+			assertEquals(
+				eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob.OPENCITATIONS_CLASSNAME,
+				r.getDataInfo().getProvenanceaction().getClassname());
+			assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemeid());
+			assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemename());
+		});
+
+	}
+
+	@Test
+	void testRelationsSemantics() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet5"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet5", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		tmp.foreach(r -> {
+			assertEquals("citation", r.getSubRelType());
+			assertEquals("resultResult", r.getRelType());
+		});
+		assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
+		assertEquals(0, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
+
+	}
+
+	@Test
+	void testRelationsSourceTargetPrefix() throws Exception {
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet6"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet6", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		tmp.foreach(r -> {
+			assertEquals("50|doi_________::", r.getSource().substring(0, 17));
+			assertEquals("50|doi_________::", r.getTarget().substring(0, 17));
+		});
+
+	}
+
+	@Test
+	void testRelationsSourceTargetCouple() throws Exception {
+		final String doi1 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x"));
+		final String doi2 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x"));
+		final String doi3 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9"));
+		final String doi4 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069"));
+		final String doi5 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4"));
+		final String doi6 = "50|doi_________::"
+			+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5"));
+
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
+			.getPath();
+
+		CreateActionSetSparkJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-outputPath",
+					workingDir.toString() + "/actionSet7"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Relation> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet7", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Relation) aa.getPayload()));
+
+		JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1));
+
+		assertEquals(5, check.count());
+
+//		check.foreach(r -> {
+//			if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) ||
+//				r.getSource().equals(doi5) || r.getSource().equals(doi6)) {
+//				assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass());
+//				assertEquals(doi1, r.getTarget());
+//			}
+//		});
+
+		assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
+		check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass()));
+
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java
@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.Measure;
 import eu.dnetlib.dhp.schema.oaf.OafEntity;
 import eu.dnetlib.dhp.schema.oaf.Result;

@ -66,10 +67,380 @@ public class SparkAtomicActionCountJobTest {
 		spark.stop();
 	}

+	@Test
+	void testUsageStatsDb2() {
+		String usageScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/test2")
+			.getPath();
+
+		SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<AtomicAction> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class));
+		// .map(aa -> (Result) aa.getPayload());
+
+		Assertions.assertEquals(7, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|")).count());
+		Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|")).count());
+		Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|")).count());
+
+		tmp.foreach(r -> Assertions.assertEquals(2, ((OafEntity) r.getPayload()).getMeasures().size()));
+		tmp
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
+		tmp
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
+		tmp
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible()))));
+
+		tmp
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"measure:usage_counts",
+										u.getDataInfo().getProvenanceaction().getClassid()))));
+		tmp
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"Inferred by OpenAIRE",
+										u.getDataInfo().getProvenanceaction().getClassname()))));
+
+		tmp
+			.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"count",
+										u.getKey()))));
+
+		Assertions
+			.assertEquals(
+				1,
+				tmp
+					.filter(
+						r -> ((OafEntity) r.getPayload())
+							.getId()
+							.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+					.count());
+
+		OafEntity entity = (OafEntity) tmp
+			.filter(
+				aa -> ((OafEntity) aa.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
+			.first()
+			.getPayload();
+
+		entity
+			.getMeasures()
+			.stream()
+			.forEach(
+				m -> Assertions.assertEquals(3, m.getUnit().size()));
+
+		Measure downloads = entity
+			.getMeasures()
+			.stream()
+			.filter(m -> m.getId().equals("downloads"))
+			.findFirst()
+			.get();
+
+		Assertions
+			.assertEquals(
+				String.valueOf(0),
+				downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
+		Assertions
+			.assertEquals(
+				String.valueOf(0),
+				downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
+		Assertions
+			.assertEquals(
+				String.valueOf(1),
+				downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
+
+		Measure views = entity
+			.getMeasures()
+			.stream()
+			.filter(m -> m.getId().equals("views"))
+			.findFirst()
+			.get();
+
+		Assertions
+			.assertEquals(
+				String.valueOf(5),
+				views.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
+		Assertions
+			.assertEquals(
+				String.valueOf(1),
+				views.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
+		Assertions
+			.assertEquals(
+				String.valueOf(3),
+				views.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
+
+		tmp
+			.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"count",
+										u.getKey()))));
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"5",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"1",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"2",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"6",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"5",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"0",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"1",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+
+		Assertions
+			.assertEquals(
+				"2",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("downloads"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+		Assertions
+			.assertEquals(
+				"6",
+				tmp
+					.map(r -> ((OafEntity) r.getPayload()))
+					.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
+					.collect()
+					.get(0)
+					.getMeasures()
+					.stream()
+					.filter(m -> m.getId().equals("views"))
+					.collect(Collectors.toList())
+					.get(0)
+					.getUnit()
+					.get(0)
+					.getValue());
+	}
+
 	@Test
 	void testMatch() {
 		String usageScoresPath = getClass()
-			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats")
+			.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/test1")
 			.getPath();

 		SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
@ -144,6 +515,39 @@ public class SparkAtomicActionCountJobTest {
 										u.getDataInfo().getProvenanceaction().getClassname()))));

 		tmp
+			.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"count",
+										u.getKey()))));
+
+		tmp
+			.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|"))
+			.foreach(
+				r -> ((OafEntity) r.getPayload())
+					.getMeasures()
+					.stream()
+					.forEach(
+						m -> m
+							.getUnit()
+							.stream()
+							.forEach(
+								u -> Assertions
+									.assertEquals(
+										"10|fake1",
+										u.getKey()))));
+
+		tmp
+			.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
 			.foreach(
 				r -> ((OafEntity) r.getPayload())
 					.getMeasures()
@ -465,5 +869,4 @@ public class SparkAtomicActionCountJobTest {
 					.get(0)
 					.getValue());
 	}
-
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectionInfo.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectionInfo.java
@ -0,0 +1,38 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import java.io.Serializable;
+
+public class BaseCollectionInfo implements Serializable {
+
+	private static final long serialVersionUID = 5766333937429419647L;
+
+	private String id;
+	private String opendoarId;
+	private String rorId;
+
+	public String getId() {
+		return this.id;
+	}
+
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public String getOpendoarId() {
+		return this.opendoarId;
+	}
+
+	public void setOpendoarId(final String opendoarId) {
+		this.opendoarId = opendoarId;
+	}
+
+	public String getRorId() {
+		return this.rorId;
+	}
+
+	public void setRorId(final String rorId) {
+		this.rorId = rorId;
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java
@ -0,0 +1,184 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.dom4j.Attribute;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Element;
+import org.dom4j.Node;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+
+@Disabled
+public class BaseCollectorIteratorTest {
+
+	@Test
+	void testImportFile() throws Exception {
+
+		long count = 0;
+
+		final BaseCollectorIterator iterator = new BaseCollectorIterator("base-sample.tar", new AggregatorReport());
+
+		final Map<String, Map<String, String>> collections = new HashMap<>();
+		final Map<String, AtomicInteger> fields = new HashMap<>();
+		final Set<String> types = new HashSet<>();
+
+		while (iterator.hasNext()) {
+
+			final Document record = DocumentHelper.parseText(iterator.next());
+
+			count++;
+
+			if ((count % 1000) == 0) {
+				System.out.println("# Read records: " + count);
+			}
+
+			// System.out.println(record.asXML());
+
+			for (final Object o : record.selectNodes("//*|//@*")) {
+				final String path = ((Node) o).getPath();
+
+				if (fields.containsKey(path)) {
+					fields.get(path).incrementAndGet();
+				} else {
+					fields.put(path, new AtomicInteger(1));
+				}
+
+				if (o instanceof Element) {
+					final Element n = (Element) o;
+
+					if ("collection".equals(n.getName())) {
+						final String collName = n.getText().trim();
+						if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
+							final Map<String, String> collAttrs = new HashMap<>();
+							for (final Object ao : n.attributes()) {
+								collAttrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
+							}
+							collections.put(collName, collAttrs);
+						}
+					} else if ("type".equals(n.getName())) {
+						types.add(n.getText().trim());
+					}
+
+				}
+			}
+
+		}
+
+		final ObjectMapper mapper = new ObjectMapper();
+		for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
+			System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
+
+		}
+
+		for (final Entry<String, AtomicInteger> e : fields.entrySet()) {
+			System.out.println(e.getKey() + ": " + e.getValue().get());
+
+		}
+
+		System.out.println("TYPES: ");
+		for (final String s : types) {
+			System.out.println(s);
+
+		}
+
+		assertEquals(30000, count);
+	}
+
+	@Test
+	public void testParquet() throws Exception {
+
+		final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
+
+		final SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
+
+		final List<BaseRecordInfo> ls = new ArrayList<>();
+
+		for (int i = 0; i < 10; i++) {
+			ls.add(extractInfo(xml));
+		}
+
+		final JavaRDD<BaseRecordInfo> rdd = JavaSparkContext
+			.fromSparkContext(spark.sparkContext())
+			.parallelize(ls);
+
+		final Dataset<BaseRecordInfo> df = spark
+			.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class));
+
+		df.printSchema();
+
+		df.show(false);
+	}
+
+	private BaseRecordInfo extractInfo(final String s) {
+		try {
+			final Document record = DocumentHelper.parseText(s);
+
+			final BaseRecordInfo info = new BaseRecordInfo();
+
+			final Set<String> paths = new LinkedHashSet<>();
+			final Set<String> types = new LinkedHashSet<>();
+			final List<BaseCollectionInfo> colls = new ArrayList<>();
+
+			for (final Object o : record.selectNodes("//*|//@*")) {
+				paths.add(((Node) o).getPath());
+
+				if (o instanceof Element) {
+					final Element n = (Element) o;
+
+					final String nodeName = n.getName();
+
+					if ("collection".equals(nodeName)) {
+						final String collName = n.getText().trim();
+
+						if (StringUtils.isNotBlank(collName)) {
+							final BaseCollectionInfo coll = new BaseCollectionInfo();
+							coll.setId(collName);
+							coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
+							coll.setRorId(n.valueOf("@ror_id").trim());
+							colls.add(coll);
+						}
+					} else if ("type".equals(nodeName)) {
+						types.add("TYPE: " + n.getText().trim());
+					} else if ("typenorm".equals(nodeName)) {
+						types.add("TYPE_NORM: " + n.getText().trim());
+					}
+				}
+			}
+
+			info.setId(record.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
+			info.getTypes().addAll(types);
+			info.getPaths().addAll(paths);
+			info.setCollections(colls);
+
+			return info;
+		} catch (final DocumentException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPluginTest.java
@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+class BaseCollectorPluginTest {
+
+	@Test
+	void testFilterXml() throws Exception {
+		final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
+
+		final Set<String> validIds = new HashSet<>(Arrays.asList("opendoar____::1234", "opendoar____::4567"));
+		final Set<String> validTypes = new HashSet<>(Arrays.asList("1", "121"));
+		final Set<String> validTypes2 = new HashSet<>(Arrays.asList("1", "11"));
+
+		assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, validTypes));
+		assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, new HashSet<>()));
+
+		assertFalse(BaseCollectorPlugin.filterXml(xml, new HashSet<>(), validTypes));
+		assertFalse(BaseCollectorPlugin.filterXml(xml, validIds, validTypes2));
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseRecordInfo.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseRecordInfo.java
@ -0,0 +1,49 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BaseRecordInfo implements Serializable {
+
+	private static final long serialVersionUID = -8848232018350074593L;
+
+	private String id;
+	private List<BaseCollectionInfo> collections = new ArrayList<>();
+	private List<String> paths = new ArrayList<>();
+	private List<String> types = new ArrayList<>();
+
+	public String getId() {
+		return this.id;
+	}
+
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public List<String> getPaths() {
+		return this.paths;
+	}
+
+	public void setPaths(final List<String> paths) {
+		this.paths = paths;
+	}
+
+	public List<String> getTypes() {
+		return this.types;
+	}
+
+	public void setTypes(final List<String> types) {
+		this.types = types;
+	}
+
+	public List<BaseCollectionInfo> getCollections() {
+		return this.collections;
+	}
+
+	public void setCollections(final List<BaseCollectionInfo> collections) {
+		this.collections = collections;
+	}
+
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Sandro La Bruzzo	843dc95340	resolved conflict	2024-04-11 17:38:16 +02:00
Miriam Baglioni	c8a88b2187	[DataciteHostedByMap] added entry for EBRAINS	2024-04-04 09:14:58 +02:00
Claudio Atzori	26b97aa5ed	Merge pull request '[BETA] fixed the result_country definition and updated the stats DB copy procedure' (#416 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #416	2024-04-03 12:36:03 +02:00
Lampros Smyrnaios	b7c8acc563	- Update the code which acquires the "IMPALA_HDFS_NODE", to test the "tmp"-dir, instead of the base-dir and introduce retries, to overcome potential file-system failures. This change was suggested by "Sebastian Tymkow" and "Grzegorz Bakalarski". - Fix typos.	2024-04-03 13:15:37 +03:00
Michele Artini	71d6e02886	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2024-04-03 09:50:41 +02:00
Michele Artini	02c9a311c8	base datainfo with trust=0.89	2024-04-03 09:50:21 +02:00
Miriam Baglioni	42846d3b91	[OpenCitation] add compression option when writing the sequence file	2024-04-03 09:25:00 +02:00
Miriam Baglioni	4f0a044245	Merge pull request 'Add action set creation for Datacite affiliations' (#413 ) from 9647_datacite_affiliations into beta Reviewed-on: #413	2024-04-02 17:33:38 +02:00
Miriam Baglioni	4bb504e693	Merge pull request '[UsageCount] fixed error' (#415 ) from UsageStatsRecordDS into beta Reviewed-on: #415	2024-04-02 17:06:12 +02:00
Serafeim Chatzopoulos	cbe13a5c61	Fix datacite input path in properties file	2024-04-02 18:00:35 +03:00
Miriam Baglioni	9c9a9562ae	[UsageCount] fixed error	2024-04-02 16:56:37 +02:00
Miriam Baglioni	2c4440951f	Merge pull request '[UsageCount] add check in case the datasource is not matched against those present in the graph' (#414 ) from UsageStatsRecordDS into beta Reviewed-on: #414	2024-04-02 16:30:39 +02:00
Miriam Baglioni	b42bdd5fb3	[UsageCount] add check in case the datasource is not matched against those present in the graph	2024-04-02 16:28:27 +02:00
Miriam Baglioni	64cbd8abe9	Merge pull request '[UsageCount] Usage count per result split by datasource' (#318 ) from UsageStatsRecordDS into beta Reviewed-on: #318	2024-04-02 10:21:39 +02:00
Antonis Lempesis	df6e3bda04	added new orgs in monitor	2024-04-01 22:45:29 +03:00
Antonis Lempesis	573b081f1d	added new orgs in monitor	2024-04-01 22:24:46 +03:00
Serafeim Chatzopoulos	0eb0701b26	Add action set creation for Datacite affiliations	2024-04-01 17:23:26 +03:00
Antonis Lempesis	0bf2a7a359	fixed the result_country definition	2024-04-01 15:23:22 +03:00
Claudio Atzori	24227ab598	Merge pull request '[BETA] fixed typo in indicator query' (#411 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #411	2024-03-27 13:56:43 +01:00
Antonis Lempesis	9ff44eed96	fixed typo in indicator query added more institutions	2024-03-27 14:39:01 +02:00
Claudio Atzori	cff6040424	Merge pull request '[BETA] added missing EOS, Generate tables with parquet-files, instead of csv in the contexts.sh script' (#409 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #409	2024-03-27 12:04:04 +01:00
Antonis Lempesis	1fee4124e0	added missing EOS	2024-03-27 12:58:25 +02:00
Claudio Atzori	9e700a8b0d	Merge pull request 'adding context information to projects and datasources' (#407 ) from taggingProjects into beta Reviewed-on: #407	2024-03-26 14:53:38 +01:00
Claudio Atzori	75551ad4ec	code formatting	2024-03-26 14:53:16 +01:00
Miriam Baglioni	94b931f7bd	[BulkTagging - tag datasource and projects]merging with branch beta	2024-03-26 14:25:19 +01:00
Miriam Baglioni	3b209261f2	[BulkTagging - tag datasource and projects]merging with branch beta	2024-03-26 14:21:27 +01:00
Lampros Smyrnaios	036ba03fcd	Generate tables with parquet-files, instead of csv, in "dhp-stats-update/.../contexts.sh" script.	2024-03-26 13:29:04 +02:00
Claudio Atzori	730eaffc85	Merge pull request 'correctly selecting the active hdfs node for the impala cluster' (#405 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #405	2024-03-26 12:07:46 +01:00
Lampros Smyrnaios	bc8c97182d	Automatically select the ACTIVE HDFS NODE for Impala cluster, in all "copyDataToImpalaCluster.sh" scripts.	2024-03-26 13:01:12 +02:00
Lampros Smyrnaios	92cc27e7eb	Use the ACTIVE HDFS NODE for Impala cluster, in "copyDataToImpalaCluster.sh" script.	2024-03-26 12:34:11 +02:00
Claudio Atzori	ef52128c55	included new stats* workflows in parent pom list of modules, code formatting	2024-03-26 10:42:10 +01:00
Claudio Atzori	bfba71a95c	further follow up changes from integrating the mergeutils branch	2024-03-26 09:01:18 +01:00
Claudio Atzori	d72e7b7487	Merge pull request 'Changes to indicators and funders definition' (#372 ) from antonis.lempesis/dnet-hadoop:beta into beta Reviewed-on: #372	2024-03-26 08:46:20 +01:00
Claudio Atzori	414acd4ef4	Merge pull request 'refactoring the Oaf records merge utilities into dhp-common' (#404 ) from mergeutils into beta Reviewed-on: #404	2024-03-25 16:16:07 +01:00
Claudio Atzori	ecff0b4825	merge from beta	2024-03-25 16:15:52 +01:00
Claudio Atzori	25c2025223	Merge pull request 'mapped oaf:country from results' (#403 ) from oaf_country_beta into beta Reviewed-on: #403	2024-03-25 16:13:31 +01:00
Claudio Atzori	538b180fe0	Merge branch 'beta' into oaf_country_beta	2024-03-25 16:13:20 +01:00
Claudio Atzori	eae88c0fe3	Merge pull request 'Solr JSON payload' (#399 ) from index_records into beta Reviewed-on: #399	2024-03-25 16:12:59 +01:00
Claudio Atzori	82fc609c4f	Merge branch 'beta' into index_records	2024-03-25 16:12:49 +01:00
Claudio Atzori	4b978ffa2d	align dhp-schema.version with the beta branch	2024-03-25 16:12:36 +01:00
Claudio Atzori	fa4b3e6d2b	Merge pull request 'Open Citation integration' (#401 ) from ocnew into beta Reviewed-on: #401	2024-03-25 16:10:40 +01:00
Claudio Atzori	74e5d05577	Merge branch 'beta' into ocnew	2024-03-25 16:10:31 +01:00
Claudio Atzori	6c3b692f60	integrated minor change from beta branch	2024-03-25 16:10:23 +01:00
Claudio Atzori	e9eb590f87	Merge pull request 'FOS ActionSet for the classification of results without a doi' (#397 ) from FOSNew into beta Reviewed-on: #397	2024-03-25 16:07:47 +01:00
Claudio Atzori	9a5b134ddf	Merge branch 'beta' into FOSNew	2024-03-25 16:07:37 +01:00
Claudio Atzori	069803f34a	Merge pull request 'Added exception throwing in Hadoop transformation when TR is not syntactically valid' (#387 ) from exception_on_invalid_transofmation_rule into beta Reviewed-on: #387	2024-03-25 16:05:43 +01:00
Claudio Atzori	71c1f81b54	Merge branch 'beta' into exception_on_invalid_transofmation_rule	2024-03-25 16:05:11 +01:00
Claudio Atzori	c3c9bdb59c	Merge pull request 'bulkTaggingPathMapExtention' (#381 ) from bulkTaggingPathMapExtention into beta Reviewed-on: #381	2024-03-25 16:02:01 +01:00
Claudio Atzori	91b61687fa	Merge branch 'beta' into bulkTaggingPathMapExtention	2024-03-25 15:50:18 +01:00
Claudio Atzori	63067d4b24	align dhp-schema.version with the beta branch	2024-03-25 15:50:05 +01:00
Claudio Atzori	e0c315b07b	Merge pull request 'Extract Information from Transformative Agreement' (#371 ) from transformativeagreement into beta Reviewed-on: #371	2024-03-25 15:42:36 +01:00
Claudio Atzori	54936b7f42	Merge branch 'beta' into transformativeagreement	2024-03-25 15:42:22 +01:00
Claudio Atzori	9fc70a9451	implemented default merge procedure applied to result.instance	2024-03-25 15:39:14 +01:00
Michele Artini	e1149eb5c4	xslt rules and tests	2024-03-25 15:01:42 +01:00
Michele Artini	3f174ad90f	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2024-03-25 12:16:02 +01:00
Michele Artini	6ffb1faf09	fixed a problem with multiple nodes	2024-03-25 12:15:51 +01:00
Giambattista Bloisi	3f22c101d9	Merge pull request 'Enrich authors with ORCID info using new matching algorithm' (#398 ) from new_orcid_enhancement into beta Reviewed-on: #398	2024-03-22 17:29:20 +01:00
Claudio Atzori	c8683eb13c	Merge branch 'beta' into mergeutils	2024-03-22 16:36:13 +01:00
Claudio Atzori	aaa73f89d1	refactoring the Oaf records merge utilities into dhp-common	2024-03-22 16:34:03 +01:00
Giambattista Bloisi	0ff7faad72	Fix conditions that prevented ORCID Enrichment	2024-03-22 16:24:49 +01:00
Michele Artini	7faa115ba0	Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta	2024-03-22 11:08:59 +01:00
Michele Artini	f9c74c98fa	fixed an identifier xpath	2024-03-22 11:08:45 +01:00
Claudio Atzori	7ae7e8aa06	Merge pull request 'Unify merge logic of entities in MergeUtils.class' (#370 ) from mergeutils into beta Reviewed-on: #370	2024-03-22 10:53:14 +01:00
Antonis Lempesis	4c40c96e30	code cleanup	2024-03-22 10:16:49 +02:00
Antonis Lempesis	459167ac2f	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into beta	2024-03-21 12:44:58 +02:00
Antonis Lempesis	07f634a46d	code cleanup	2024-03-21 12:44:30 +02:00
Antonis Lempesis	9521625a07	code cleanup	2024-03-21 11:45:08 +02:00
Antonis Lempesis	67a5aa0a38	Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into beta	2024-03-19 11:24:54 +02:00
dimitrispie	a3a570e9a0	Commit monitor-updates-wf	2024-03-19 09:42:21 +02:00
Giambattista Bloisi	664a381d31	Unify merge logic of entities in MergeUtils.class	2024-03-18 16:04:49 +01:00
Michele Artini	cb29b9773c	xslt rules	2024-03-18 15:31:34 +01:00
Michele Artini	85b844d57e	updated BASE filter param	2024-03-15 15:03:27 +01:00
Michele Artini	455f2e1e07	apply commits from master	2024-03-15 14:56:39 +01:00
Michele Artini	30167aa882	mapped oaf:country from results	2024-03-15 11:24:16 +01:00
Michele Artini	88fef367b9	new plugin to collect from a dump of BASE	2024-03-15 10:47:52 +01:00
Claudio Atzori	078169b922	cleanup	2024-03-15 09:56:04 +01:00
Claudio Atzori	af154d4456	implemented changes from #9497 : sort abstracts by string length, included author fullnames in the related results, expanded instance details within each children/result XML element	2024-03-14 16:21:23 +01:00
Claudio Atzori	7863c92466	expanded paper abstract in the result/children XML element (ticket #9497 )	2024-03-13 16:25:31 +01:00
Claudio Atzori	eb5887cb9a	including related organization url in the XML record serialization (ticket #9498 )	2024-03-13 14:46:00 +01:00
Miriam Baglioni	5a32bb9578	[OC New] last fix	2024-03-13 09:36:18 +01:00
Miriam Baglioni	48c052215c	[OC New] last fix	2024-03-12 23:12:32 +01:00
Claudio Atzori	db66555ebb	WIP: updated provision workflow to create a JSON based representation of the payload	2024-03-12 09:56:09 +01:00
Antonis Lempesis	f74c7e8689	selecting distinct peer_reviewed	2024-03-12 02:13:04 +02:00
Giambattista Bloisi	9092075760	Enrich authors with ORCID info using new matching algorithm	2024-03-11 13:23:59 +01:00
Claudio Atzori	d4871b31e8	WIP: extended provision workflow to create the JSON based payload	2024-03-08 11:43:20 +01:00
Antonis Lempesis	3c79720342	fixed the irish result subset	2024-03-07 14:08:57 +02:00
Antonis Lempesis	5ae4b4286c	Merge branch 'beta' of https://code-repo.d3science.org/antonis.lempesis/dnet-hadoop into beta	2024-03-07 12:15:19 +02:00
Miriam Baglioni	5180b6ec8a	[FOSNEW] removed test class	2024-03-07 10:47:13 +01:00
Miriam Baglioni	7827a2d66b	[OCNEW] added creation of the actionset for the results classified with FoS based ont he OpenAIRE identifier	2024-03-07 10:36:30 +01:00
Antonis Lempesis	316d585c8a	using distinct apcs per publication to avoid huge sums	2024-03-07 02:07:59 +02:00
Miriam Baglioni	fd34372c40	[OCNEW] first implementation	2024-03-06 13:42:00 +01:00
Claudio Atzori	6fcf872daa	Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into index_records	2024-02-28 10:27:28 +01:00
Claudio Atzori	3f07390a58	WIP	2024-02-28 10:10:10 +01:00
Miriam Baglioni	72bae7af76	[Transformative Agreement] removed the relations from the ActionSet waiting to have the gree light from Ioanna	2024-02-19 16:20:12 +01:00
Miriam Baglioni	43da7e1191	[Tagging Projects and Datasource] changed the way the pathMap parameter is passed. It was too long and was truncated	2024-02-19 16:12:59 +01:00
Serafeim Chatzopoulos	f0dc12634b	Add Action Set creation for affiliations inferred from the OpenAPC data	2024-02-18 18:02:09 +02:00
Miriam Baglioni	8dae10b442	-	2024-02-14 14:57:08 +01:00
Miriam Baglioni	83bb97be83	[Tagging Projects and Datasource] added test to check datasource tagging. Fixed issue	2024-02-14 11:23:47 +01:00
Miriam Baglioni	6e1f383e4a	[Tagging Projects and Datasource] first extention of bulktagging to add the context to projects and datasource	2024-02-13 16:37:14 +01:00
Miriam Baglioni	3f7d262a4e	mergin with branch beta	2024-02-13 14:05:58 +01:00
Miriam Baglioni	eca021f4d6	[Transformative Agreement] add results with information abount the agreement and the country of the organization paid for it	2024-02-13 12:21:07 +01:00
Miriam Baglioni	bdb6bbb365	mergin with branch beta	2024-02-12 15:50:43 +01:00
Antonis Lempesis	dd4c27f4f3	added 2 new institutions in monitor	2024-02-08 12:57:57 +02:00
Antonis Lempesis	a512ead447	changed orcid ids to all capital	2024-01-30 16:54:47 +02:00
Miriam Baglioni	07a373a0bd	[bulkTagging] removing checks while performing the substring action so that it will fire an Exception if the paramneters are wrongly set	2024-01-30 13:51:11 +01:00
Miriam Baglioni	ead08b0dd4	mergin with branch beta	2024-01-30 12:19:10 +01:00
Antonis Lempesis	bb10a22290	merged changes from dnet-hadoop	2024-01-29 21:51:47 +02:00
Miriam Baglioni	a418dacb47	[UsageCount] code extention to include also the name of the datasource	2024-01-29 18:12:33 +01:00
Miriam Baglioni	e9131f4e4a	mergin with branch beta	2024-01-29 16:27:18 +01:00
Sandro La Bruzzo	9aebca77a0	Added exception throwing in Hadoop transformation when TR is not syntactically valid	2024-01-29 14:41:02 +01:00
Antonis Lempesis	c548796463	Changed step16-createIndicatorsTables to use a spark oozie action instead of hive	2024-01-26 02:04:48 +02:00
Antonis Lempesis	fd43b0e84a	max mem of joins (hive.mapjoin.followby.gby.localtask.max.memory.usage) now 80%, up from 55%.	2024-01-25 15:06:34 +01:00
Miriam Baglioni	f7d06dc661	compilation after merging	2024-01-23 11:43:08 +01:00
Miriam Baglioni	6e58d79623	mergin with branch beta	2024-01-23 11:36:47 +01:00
Miriam Baglioni	e0ec800d7e	[BulkTagging] extend the definition of the pathMap to include also actions that should be performed of the value extracted from the result befor applying the constraint	2024-01-23 11:34:53 +01:00
Antonis Lempesis	e024718f73	creating result_instances even when no pids exist for the instance	2024-01-10 22:25:50 +01:00
dimitrispie	b920307bdd	Changes to indicators	2024-01-09 00:47:09 +02:00
dimitrispie	8b2cbb611e	Changes to beta db names	2024-01-09 00:40:56 +02:00
Antonis Lempesis	2e4cab026c	fixed the result_country definition	2024-01-08 16:01:26 +02:00
dimitrispie	6b823100ae	Update buildIrishMonitorDB.sql New indicators added	2024-01-07 22:54:39 +02:00
dimitrispie	75bfde043c	Historical Snapshots Workflow Create historical snapshots db with parameters: hist_db_name=openaire_beta_historical_snapshots_xxx hist_db_name_prev=openaire_beta_historical_snapshots_xxx (previous run of wf) stats_db_name=openaire_beta_stats_xxx stats_irish_db_name=openaire_beta_stats_monitor_ie_xxx monitor_db_name=openaire_beta_stats_monitor_xxx monitor_db_prod_name=openaire_beta_stats_monitor monitor_irish_db_name=openaire_beta_stats_monitor_ie_xxx monitor_irish_db_prod_name=openaire_beta_stats_monitor_ie hist_db_prod_name=openaire_beta_historical_snapshots hist_db_shadow_name=openaire_beta_historical_snapshots_shadow hist_date=122023 hive_timeout=150000 hadoop_user_name=xxx resumeFrom=CreateDB	2024-01-04 15:11:04 +02:00
dimitrispie	ffdd03d2f4	Monitor Irish Stats WF Parameters (with examples): stats_db_name=openaire_beta_stats_20231208 monitor_irish_db_name=openaire_beta_stats_monitor_ie_20231208b monitor_irish_db_prod_name=openaire_beta_stats_monitor_ie graph_db_name=openaire_beta_20231208 monitor_irish_db_shadow_name=openaire_beta_stats_monitor_ie_shadow hive_timeout=150000 hadoop_user_name=dnet.beta resumeFrom=Step1-buildIrishMonitorDB	2023-12-22 11:05:24 +02:00
dimitrispie	40b98d8182	Changes to indicators and funders definition - Changes result_refereed definition - Added result_country indicator - Added indi_pub_green_with_license indicator - Added country from jurisdiction to funders	2023-12-22 10:29:20 +02:00
Miriam Baglioni	624f5f3f21	[Transformative Agreement] added check to verify the APC were paid byu the IReL funder	2023-12-18 15:28:19 +01:00
Miriam Baglioni	354e02e6a9	[Transformative Agreement] removed not needed class. Read directly the json and no need to pass from the csv	2023-12-18 15:20:27 +01:00
Miriam Baglioni	b00771c7cc	[Transformative Agreement] added code to extract relations from the transformative agreement file for the IE products got from OpenAPC	2023-12-18 15:12:44 +01:00
Miriam Baglioni	4c9bc4c3a5	refactoring	2023-06-30 19:05:15 +02:00
Miriam Baglioni	55ea485783	[UsageCount] split the count for result at the level of the datasource. for each indicator one unit is specified for each datasource contrinuting to that indicator value. The datasource key is the value of the key element in the unit for the measure, while the count for that datasource is in the value	2023-06-30 18:39:30 +02:00
				`@ -0,0 +1 @@`
				`{"id":"50\|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}`