merged

2020-05-19 09:43:12 +02:00 · 2020-05-19 09:43:12 +02:00 · ca722d4d18
parent 7362bc3e9d 8c95b50f26
commit ca722d4d18
77 changed files with 1880 additions and 803 deletions
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@ -13,6 +13,7 @@ public class ModelConstants {
 	public static final String DNET_DATA_CITE_DATE = "dnet:dataCite_date";
 	public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
 	public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
+	public static final String DNET_COUNTRY_TYPE = "dnet:countries";

 	public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
 	public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
@ -49,6 +50,13 @@ public class ModelConstants {
 	public static final String HAS_PARTICIPANT = "hasParticipant";
 	public static final String IS_PARTICIPANT = "isParticipant";

+	public static final String RESULT_ORGANIZATION = "resultOrganization";
+	public static final String AFFILIATION = "affiliation";
+	public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
+	public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
+
+	public static final String MERGES = "merges";
+
 	public static final String UNKNOWN = "UNKNOWN";
 	public static final String NOT_AVAILABLE = "not available";

--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
@ -1,10 +1,15 @@

 package eu.dnetlib.dhp.schema.common;

+import static com.google.common.base.Preconditions.checkArgument;
+
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.function.Function;

+import org.apache.commons.lang3.StringUtils;
+
 import com.google.common.collect.Maps;

 import eu.dnetlib.dhp.schema.oaf.*;
@ -379,6 +384,21 @@ public class ModelSupport {
 				entityMapping.get(EntityType.valueOf(targetType)).name());
 	}

+	public static <T extends Oaf> String tableIdentifier(String dbName, String tableName) {
+
+		checkArgument(StringUtils.isNotBlank(dbName), "DB name cannot be empty");
+		checkArgument(StringUtils.isNotBlank(tableName), "table name cannot be empty");
+
+		return String.format("%s.%s", dbName, tableName);
+	}
+
+	public static <T extends Oaf> String tableIdentifier(String dbName, Class<T> clazz) {
+
+		checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null");
+
+		return tableIdentifier(dbName, clazz.getSimpleName().toLowerCase());
+	}
+
 	public static <T extends Oaf> Function<T, String> idFn() {
 		return x -> {
 			if (isSubClass(x, Relation.class)) {
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable {
 	}

 	private static Context mapContext(ResultProtos.Result.Context context) {
-
+		if (context == null || StringUtils.isBlank(context.getId())) {
+			return null;
+		}
 		final Context entity = new Context();
 		entity.setId(context.getId());
 		entity
@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable {
 	}

 	public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
+		if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
+			return null;
+		}
+
 		final KeyValue keyValue = new KeyValue();
 		keyValue.setKey(kv.getKey());
 		keyValue.setValue(kv.getValue());
@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable {
 	}

 	public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
+		if (sp == null | StringUtils.isBlank(sp.getValue())) {
+			return null;
+		}
+
 		final StructuredProperty structuredProperty = new StructuredProperty();
 		structuredProperty.setValue(sp.getValue());
 		structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable {
 	}

 	public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
+		if (s == null || StringUtils.isBlank(s.getValue())) {
+			return null;
+		}
+
 		final Field<String> stringField = new Field<>();
 		stringField.setValue(s.getValue());
 		stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable {
 	}

 	public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
+		if (b == null) {
+			return null;
+		}
+
 		final Field<Boolean> booleanField = new Field<>();
 		booleanField.setValue(b.getValue());
 		booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
 		return booleanField;
 	}

-	public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
-		final Field<Integer> entity = new Field<>();
-		entity.setValue(b.getValue());
-		entity.setDataInfo(mapDataInfo(b.getDataInfo()));
-		return entity;
-	}
-
 	public static Journal mapJournal(FieldTypeProtos.Journal j) {
 		final Journal journal = new Journal();
 		journal.setConferencedate(j.getConferencedate());
--- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java
+++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java
@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.schema.oaf.Relation;

 public class PrepareMergedRelationJob {
@ -56,6 +57,7 @@ public class PrepareMergedRelationJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
+				removeOutputDir(spark, outputPath);
 				selectMergesRelations(
 					spark,
 					inputPath,
@ -84,4 +86,9 @@ public class PrepareMergedRelationJob {
 				(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
 				Encoders.bean(Relation.class));
 	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
 }
--- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java
+++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java
@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;

@ -62,6 +63,7 @@ public class SparkRemoveBlacklistedRelationJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
+				removeOutputDir(spark, outputPath);
 				removeBlacklistedRelations(
 					spark,
 					blacklistPath,
@ -69,7 +71,6 @@ public class SparkRemoveBlacklistedRelationJob {
 					outputPath,
 					mergesPath);
 			});
-
 	}

 	private static void removeBlacklistedRelations(SparkSession spark, String blacklistPath, String inputPath,
@ -78,8 +79,6 @@ public class SparkRemoveBlacklistedRelationJob {
 		Dataset<Relation> inputRelation = readRelations(spark, inputPath);
 		Dataset<Relation> mergesRelation = readRelations(spark, mergesPath);

-		log.info("InputRelationCount: {}", inputRelation.count());
-
 		Dataset<Relation> dedupSource = blackListed
 			.joinWith(
 				mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")),
@ -102,11 +101,6 @@ public class SparkRemoveBlacklistedRelationJob {
 				return c._1();
 			}, Encoders.bean(Relation.class));

-		dedupBL
-			.write()
-			.mode(SaveMode.Overwrite)
-			.json(blacklistPath + "/deduped");
-
 		inputRelation
 			.joinWith(
 				dedupBL, (inputRelation
@ -144,4 +138,8 @@ public class SparkRemoveBlacklistedRelationJob {
 				Encoders.bean(Relation.class));
 	}

+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
 }
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
@ -22,6 +22,25 @@
        </property>
    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -49,8 +68,6 @@

    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
@ -60,8 +77,6 @@

    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
@ -71,8 +86,6 @@

    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
@ -82,8 +95,6 @@

    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
@ -93,8 +104,6 @@
    
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
@ -104,8 +113,6 @@

    <action name="copy_project">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -115,8 +122,6 @@

    <action name="copy_datasource">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
@ -128,8 +133,6 @@

    <action name="read_blacklist">
        <java>
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.blacklist.ReadBlacklistFromDB</main-class>
            <arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
@ -156,6 +159,7 @@
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/mergesRelation</arg>
@ -180,6 +184,7 @@
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java
@ -29,31 +29,32 @@ public class EventFactory {
 		"yyyy-MM-dd"
 	};

-	public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo<?> updateInfo) {
+	public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {

 		final long now = new Date().getTime();

 		final Event res = new Event();

-		final Map<String, Object> map = createMapFromResult(target, source, updateInfo);
+		final Map<String, Object> map = createMapFromResult(updateInfo);

-		final String payload = createPayload(target, updateInfo);
+		final String payload = createPayload(updateInfo);

 		final String eventId = calculateEventId(
-			updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
+			updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0),
+			updateInfo.getHighlightValueAsString());

 		res.setEventId(eventId);
 		res.setProducerId(PRODUCER_ID);
 		res.setPayload(payload);
 		res.setMap(map);
-		res.setTopic(updateInfo.getTopic());
+		res.setTopic(updateInfo.getTopicPath());
 		res.setCreationDate(now);
 		res.setExpiryDate(calculateExpiryDate(now));
 		res.setInstantMessage(false);
 		return res;
 	}

-	private static String createPayload(final Result result, final UpdateInfo<?> updateInfo) {
+	private static String createPayload(final UpdateInfo<?> updateInfo) {
 		final OpenAireEventPayload payload = new OpenAireEventPayload();
 		// TODO

@ -62,32 +63,34 @@ public class EventFactory {
 		return payload.toJSON();
 	}

-	private static Map<String, Object> createMapFromResult(final Result oaf, final Result source,
-		final UpdateInfo<?> updateInfo) {
+	private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
 		final Map<String, Object> map = new HashMap<>();

-		final List<KeyValue> collectedFrom = oaf.getCollectedfrom();
+		final Result source = updateInfo.getSource();
+		final Result target = updateInfo.getTarget();
+
+		final List<KeyValue> collectedFrom = target.getCollectedfrom();
 		if (collectedFrom.size() == 1) {
 			map.put("target_datasource_id", collectedFrom.get(0).getKey());
 			map.put("target_datasource_name", collectedFrom.get(0).getValue());
 		}

-		final List<String> ids = oaf.getOriginalId();
+		final List<String> ids = target.getOriginalId();
 		if (ids.size() > 0) {
 			map.put("target_publication_id", ids.get(0));
 		}

-		final List<StructuredProperty> titles = oaf.getTitle();
+		final List<StructuredProperty> titles = target.getTitle();
 		if (titles.size() > 0) {
 			map.put("target_publication_title", titles.get(0));
 		}

-		final long date = parseDateTolong(oaf.getDateofacceptance().getValue());
+		final long date = parseDateTolong(target.getDateofacceptance().getValue());
 		if (date > 0) {
 			map.put("target_dateofacceptance", date);
 		}

-		final List<StructuredProperty> subjects = oaf.getSubject();
+		final List<StructuredProperty> subjects = target.getSubject();
 		if (subjects.size() > 0) {
 			map
 				.put(
@ -95,7 +98,7 @@ public class EventFactory {
 					subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
 		}

-		final List<Author> authors = oaf.getAuthor();
+		final List<Author> authors = target.getAuthor();
 		if (authors.size() > 0) {
 			map
 				.put(
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java
@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.broker.model;
+
+public enum Topic {
+
+	// ENRICHMENT MISSING
+	ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT(
+		"ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE(
+			"ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID(
+				"ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE(
+					"ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC(
+						"ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV(
+							"ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL(
+								"ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC(
+									"ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM(
+										"ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK(
+											"ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID(
+												"ENRICH/MISSING/AUTHOR/ORCID"),
+
+	// ENRICHMENT MORE
+	ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT(
+		"ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT(
+			"ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC(
+				"ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV(
+					"ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL(
+						"ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC(
+							"ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM(
+								"ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"),
+
+	// ADDITION
+	ADD_BY_PROJECT("ADD/BY_PROJECT");
+
+	Topic(final String path) {
+		this.path = path;
+	}
+
+	protected String path;
+
+	public String getPath() {
+		return this.path;
+	}
+
+	public static Topic fromPath(final String path) {
+		for (final Topic t : Topic.values()) {
+			if (t.getPath().equals(path)) {
+				return t;
+			}
+		}
+		return null;
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java
@ -14,21 +14,20 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import com.fasterxml.jackson.databind.ObjectMapper;
-
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.broker.model.Event;
 import eu.dnetlib.dhp.broker.model.EventFactory;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAbstract;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAuthorOrcid;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingOpenAccess;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPid;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingProject;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPublicationDate;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMissingSubject;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid;
-import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid;
+import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject;
+import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
 import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.schema.oaf.Result;
@ -37,7 +36,16 @@ public class GenerateEventsApplication {

 	private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);

-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	private static final UpdateMatcher<?> enrichMissingAbstract = new EnrichMissingAbstract();
+	private static final UpdateMatcher<?> enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid();
+	private static final UpdateMatcher<?> enrichMissingOpenAccess = new EnrichMissingOpenAccess();
+	private static final UpdateMatcher<?> enrichMissingPid = new EnrichMissingPid();
+	private static final UpdateMatcher<?> enrichMissingProject = new EnrichMissingProject();
+	private static final UpdateMatcher<?> enrichMissingPublicationDate = new EnrichMissingPublicationDate();
+	private static final UpdateMatcher<?> enrichMissingSubject = new EnrichMissingSubject();
+	private static final UpdateMatcher<?> enrichMoreOpenAccess = new EnrichMoreOpenAccess();
+	private static final UpdateMatcher<?> enrichMorePid = new EnrichMorePid();
+	private static final UpdateMatcher<?> enrichMoreSubject = new EnrichMoreSubject();

 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -76,37 +84,22 @@ public class GenerateEventsApplication {
 	}

 	private List<Event> generateEvents(final Result... children) {
-		final List<Event> list = new ArrayList<>();
+		final List<UpdateInfo<?>> list = new ArrayList<>();

-		for (final Result source : children) {
-			for (final Result target : children) {
-				if (source != target) {
-					list
-						.addAll(
-							findUpdates(source, target)
-								.stream()
-								.map(info -> EventFactory.newBrokerEvent(source, target, info))
-								.collect(Collectors.toList()));
-				}
-			}
+		for (final Result target : children) {
+			list.addAll(enrichMissingAbstract.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMorePid.searchUpdatesForRecord(target, children));
+			list.addAll(enrichMoreSubject.searchUpdatesForRecord(target, children));
 		}

-		return list;
-	}
-
-	private List<UpdateInfo<?>> findUpdates(final Result source, final Result target) {
-		final List<UpdateInfo<?>> list = new ArrayList<>();
-		list.addAll(EnrichMissingAbstract.findUpdates(source, target));
-		list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target));
-		list.addAll(EnrichMissingOpenAccess.findUpdates(source, target));
-		list.addAll(EnrichMissingPid.findUpdates(source, target));
-		list.addAll(EnrichMissingProject.findUpdates(source, target));
-		list.addAll(EnrichMissingPublicationDate.findUpdates(source, target));
-		list.addAll(EnrichMissingSubject.findUpdates(source, target));
-		list.addAll(EnrichMoreOpenAccess.findUpdates(source, target));
-		list.addAll(EnrichMorePid.findUpdates(source, target));
-		list.addAll(EnrichMoreSubject.findUpdates(source, target));
-		return list;
+		return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
 	}

 }
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java
@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingAbstract extends UpdateMatcher<String> {
+
+	public EnrichMissingAbstract() {
+		super(false);
+	}
+
+	@Override
+	protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
+		if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) {
+			return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target));
+		}
+		return new ArrayList<>();
+	}
+
+	@Override
+	public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
+		final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_ABSTRACT,
+			highlightValue, source, target,
+			(p, s) -> p.getAbstracts().add(s),
+			s -> s);
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java
@ -0,0 +1,34 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {
+
+	public EnrichMissingAuthorOrcid() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
+		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+		return Arrays.asList();
+	}
+
+	@Override
+	public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
+		final Result source, final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_AUTHOR_ORCID,
+			highlightValue, source, target,
+			(p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()),
+			pair -> pair.getLeft() + "::" + pair.getRight());
+	}
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java
@ -0,0 +1,55 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.broker.objects.Instance;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
+
+	public EnrichMissingOpenAccess() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
+		final long count = target
+			.getInstance()
+			.stream()
+			.map(i -> i.getAccessright().getClassid())
+			.filter(right -> right.equals(BrokerConstants.OPEN_ACCESS))
+			.count();
+
+		if (count > 0) {
+			return Arrays.asList();
+		}
+
+		return source
+			.getInstance()
+			.stream()
+			.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
+			.map(ConversionUtils::oafInstanceToBrokerInstances)
+			.flatMap(s -> s)
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
+		final Result source,
+		final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_OA_VERSION,
+			highlightValue, source, target,
+			(p, i) -> p.getInstances().add(i),
+			Instance::getUrl);
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java
@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.broker.objects.Pid;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingPid extends UpdateMatcher<Pid> {
+
+	public EnrichMissingPid() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
+		final long count = target.getPid().size();
+
+		if (count > 0) {
+			return Arrays.asList();
+		}
+
+		return source
+			.getPid()
+			.stream()
+			.map(ConversionUtils::oafPidToBrokerPid)
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_PID,
+			highlightValue, source, target,
+			(p, pid) -> p.getPids().add(pid),
+			pid -> pid.getType() + "::" + pid.getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java
@ -0,0 +1,35 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.Project;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingProject extends UpdateMatcher<Project> {
+
+	public EnrichMissingProject() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Project>> findUpdates(final Result source, final Result target) {
+		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+		return Arrays.asList();
+	}
+
+	@Override
+	public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue,
+		final Result source,
+		final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_PROJECT,
+			highlightValue, source, target,
+			(p, prj) -> p.getProjects().add(prj),
+			prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java
@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
+
+	public EnrichMissingPublicationDate() {
+		super(false);
+	}
+
+	@Override
+	protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
+		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+		return Arrays.asList();
+	}
+
+	@Override
+	public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
+		final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MISSING_PUBLICATION_DATE,
+			highlightValue, source, target,
+			(p, date) -> p.setPublicationdate(date),
+			s -> s);
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java
@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
+
+	public EnrichMissingSubject() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
+		final Set<String> existingTypes = target
+			.getSubject()
+			.stream()
+			.map(StructuredProperty::getQualifier)
+			.map(Qualifier::getClassid)
+			.collect(Collectors.toSet());
+
+		return source
+			.getPid()
+			.stream()
+			.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
+			.map(ConversionUtils::oafSubjectToPair)
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
+		final Result source,
+		final Result target) {
+
+		return new UpdateInfo<>(
+			Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
+			highlightValue, source, target,
+			(p, pair) -> p.getSubjects().add(pair.getRight()),
+			pair -> pair.getLeft() + "::" + pair.getRight());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java
@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.broker.objects.Instance;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
+
+	public EnrichMoreOpenAccess() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
+		final Set<String> urls = target
+			.getInstance()
+			.stream()
+			.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
+			.map(i -> i.getUrl())
+			.flatMap(List::stream)
+			.collect(Collectors.toSet());
+
+		return source
+			.getInstance()
+			.stream()
+			.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
+			.map(ConversionUtils::oafInstanceToBrokerInstances)
+			.flatMap(s -> s)
+			.filter(i -> !urls.contains(i.getUrl()))
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
+		final Result source,
+		final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MORE_OA_VERSION,
+			highlightValue, source, target,
+			(p, i) -> p.getInstances().add(i),
+			Instance::getUrl);
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java
@ -0,0 +1,46 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.broker.objects.Pid;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMorePid extends UpdateMatcher<Pid> {
+
+	public EnrichMorePid() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
+		final Set<String> existingPids = target
+			.getPid()
+			.stream()
+			.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
+			.collect(Collectors.toSet());
+
+		return source
+			.getPid()
+			.stream()
+			.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
+			.map(ConversionUtils::oafPidToBrokerPid)
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
+		return new UpdateInfo<>(
+			Topic.ENRICH_MORE_PID,
+			highlightValue, source, target,
+			(p, pid) -> p.getPids().add(pid),
+			pid -> pid.getType() + "::" + pid.getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java
@ -0,0 +1,50 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
+
+	public EnrichMoreSubject() {
+		super(true);
+	}
+
+	@Override
+	protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
+		final Set<String> existingSubjects = target
+			.getSubject()
+			.stream()
+			.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
+			.collect(Collectors.toSet());
+
+		return source
+			.getPid()
+			.stream()
+			.filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
+			.map(ConversionUtils::oafSubjectToPair)
+			.map(i -> generateUpdateInfo(i, source, target))
+			.collect(Collectors.toList());
+	}
+
+	@Override
+	public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
+		final Result source,
+		final Result target) {
+
+		return new UpdateInfo<>(
+			Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()),
+			highlightValue, source, target,
+			(p, pair) -> p.getSubjects().add(pair.getRight()),
+			pair -> pair.getLeft() + "::" + pair.getRight());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java
@ -0,0 +1,64 @@
+
+package eu.dnetlib.dhp.broker.oa.matchers;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Field;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public abstract class UpdateMatcher<T> {
+
+	private final boolean multipleUpdate;
+
+	public UpdateMatcher(final boolean multipleUpdate) {
+		this.multipleUpdate = multipleUpdate;
+	}
+
+	public Collection<UpdateInfo<T>> searchUpdatesForRecord(final Result res, final Result... others) {
+
+		final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
+
+		for (final Result source : others) {
+			if (source != res) {
+				for (final UpdateInfo<T> info : findUpdates(source, res)) {
+					final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
+					if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
+					} else {
+						infoMap.put(s, info);
+					}
+				}
+			}
+		}
+
+		final Collection<UpdateInfo<T>> values = infoMap.values();
+
+		if (values.isEmpty() || multipleUpdate) {
+			return values;
+		} else {
+			final UpdateInfo<T> v = values
+				.stream()
+				.sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust()))
+				.findFirst()
+				.get();
+			return Arrays.asList(v);
+		}
+	}
+
+	protected abstract List<UpdateInfo<T>> findUpdates(Result source, Result target);
+
+	protected abstract UpdateInfo<T> generateUpdateInfo(final T highlightValue, final Result source,
+		final Result target);
+
+	protected static boolean isMissing(final List<Field<String>> list) {
+		return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java
@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+public class BrokerConstants {
+
+	public final static String OPEN_ACCESS = "OPEN";
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.stream.Stream;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import eu.dnetlib.broker.objects.Instance;
+import eu.dnetlib.broker.objects.Pid;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class ConversionUtils {
+
+	public static Stream<Instance> oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) {
+		return i.getUrl().stream().map(url -> {
+			final Instance r = new Instance();
+			r.setUrl(url);
+			r.setInstancetype(i.getInstancetype().getClassid());
+			r.setLicense(BrokerConstants.OPEN_ACCESS);
+			r.setHostedby(i.getHostedby().getValue());
+			return r;
+		});
+	}
+
+	public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
+		final Pid pid = new Pid();
+		pid.setValue(sp.getValue());
+		pid.setType(sp.getQualifier().getClassid());
+		return pid;
+	}
+
+	public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
+		return Pair.of(sp.getQualifier().getClassid(), sp.getValue());
+	}
+
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java
@ -1,31 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingAbstract extends UpdateInfo<String> {
-
-	public static List<EnrichMissingAbstract> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingAbstract(final String highlightValue, final float trust) {
-		super("ENRICH/MISSING/ABSTRACT", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getAbstracts().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java
@ -1,31 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingAuthorOrcid extends UpdateInfo<String> {
-
-	public static List<EnrichMissingAuthorOrcid> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) {
-		super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		// TODO
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java
@ -1,32 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.Instance;
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingOpenAccess extends UpdateInfo<Instance> {
-
-	public static List<EnrichMissingOpenAccess> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) {
-		super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getInstances().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue().getUrl();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java
@ -1,32 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.broker.objects.Pid;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingPid extends UpdateInfo<Pid> {
-
-	public static List<EnrichMissingPid> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingPid(final Pid highlightValue, final float trust) {
-		super("ENRICH/MISSING/PID", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getPids().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java
@ -1,33 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.broker.objects.Project;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingProject extends UpdateInfo<Project> {
-
-	public static List<EnrichMissingProject> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingProject(final Project highlightValue, final float trust) {
-		super("ENRICH/MISSING/PROJECT", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getProjects().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
-			+ getHighlightValue().getCode();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java
@ -1,31 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingPublicationDate extends UpdateInfo<String> {
-
-	public static List<EnrichMissingPublicationDate> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMissingPublicationDate(final String highlightValue, final float trust) {
-		super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().setPublicationdate(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java
@ -1,36 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMissingSubject extends UpdateInfo<String> {
-
-	public static List<EnrichMissingSubject> findUpdates(final Result source, final Result target) {
-		// MESHEUROPMC
-		// ARXIV
-		// JEL
-		// DDC
-		// ACM
-
-		return Arrays.asList();
-	}
-
-	private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) {
-		super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getSubjects().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java
@ -1,32 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.Instance;
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMoreOpenAccess extends UpdateInfo<Instance> {
-
-	public static List<EnrichMoreOpenAccess> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) {
-		super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getInstances().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue().getUrl();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java
@ -1,32 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.broker.objects.Pid;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMorePid extends UpdateInfo<Pid> {
-
-	public static List<EnrichMorePid> findUpdates(final Result source, final Result target) {
-		// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
-		return Arrays.asList();
-	}
-
-	private EnrichMorePid(final Pid highlightValue, final float trust) {
-		super("ENRICH/MORE/PID", highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getPids().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java
@ -1,36 +0,0 @@
-
-package eu.dnetlib.dhp.broker.oa.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-import eu.dnetlib.broker.objects.OpenAireEventPayload;
-import eu.dnetlib.dhp.schema.oaf.Result;
-
-public class EnrichMoreSubject extends UpdateInfo<String> {
-
-	public static List<EnrichMoreSubject> findUpdates(final Result source, final Result target) {
-		// MESHEUROPMC
-		// ARXIV
-		// JEL
-		// DDC
-		// ACM
-
-		return Arrays.asList();
-	}
-
-	private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) {
-		super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust);
-	}
-
-	@Override
-	public void compileHighlight(final OpenAireEventPayload payload) {
-		payload.getHighlight().getSubjects().add(getHighlightValue());
-	}
-
-	@Override
-	public String getHighlightValueAsString() {
-		return getHighlightValue();
-	}
-
-}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java
@ -1,36 +1,77 @@

 package eu.dnetlib.dhp.broker.oa.util;

+import java.util.function.BiConsumer;
+import java.util.function.Function;
+
 import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.broker.objects.Publication;
+import eu.dnetlib.dhp.broker.model.Topic;
+import eu.dnetlib.dhp.schema.oaf.Result;

-public abstract class UpdateInfo<T> {
+public final class UpdateInfo<T> {

-	private final String topic;
+	private final Topic topic;

 	private final T highlightValue;

+	private final Result source;
+
+	private final Result target;
+
+	private final BiConsumer<Publication, T> compileHighlight;
+
+	private final Function<T, String> highlightToString;
+
 	private final float trust;

-	protected UpdateInfo(final String topic, final T highlightValue, final float trust) {
+	public UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target,
+		final BiConsumer<Publication, T> compileHighlight,
+		final Function<T, String> highlightToString) {
 		this.topic = topic;
 		this.highlightValue = highlightValue;
-		this.trust = trust;
+		this.source = source;
+		this.target = target;
+		this.compileHighlight = compileHighlight;
+		this.highlightToString = highlightToString;
+		this.trust = calculateTrust(source, target);
 	}

 	public T getHighlightValue() {
 		return highlightValue;
 	}

+	public Result getSource() {
+		return source;
+	}
+
+	public Result getTarget() {
+		return target;
+	}
+
+	private float calculateTrust(final Result source, final Result target) {
+		// TODO
+		return 0.9f;
+	}
+
+	protected Topic getTopic() {
+		return topic;
+	}
+
+	public String getTopicPath() {
+		return topic.getPath();
+	}
+
 	public float getTrust() {
 		return trust;
 	}

-	public String getTopic() {
-		return topic;
+	public void compileHighlight(final OpenAireEventPayload payload) {
+		compileHighlight.accept(payload.getHighlight(), getHighlightValue());
 	}

-	abstract public void compileHighlight(OpenAireEventPayload payload);
-
-	abstract public String getHighlightValueAsString();
+	public String getHighlightValueAsString() {
+		return highlightToString.apply(getHighlightValue());
+	}

 }
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java
@ -129,6 +129,9 @@ public class DedupUtility {
 						.max(Comparator.comparing(Tuple2::_1));
 					if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) {
 						Author r = simAuhtor.get()._2();
+						if (r.getPid() == null) {
+							r.setPid(new ArrayList<>());
+						}
 						r.getPid().add(a._1());
 					}
 				});
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@ -0,0 +1,104 @@
+package eu.dnetlib.doiboost
+
+import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, KeyValue, Qualifier, Result, StructuredProperty}
+import eu.dnetlib.dhp.utils.DHPUtils
+
+object DoiBoostMappingUtil {
+
+  //STATIC STRING
+  val MAG = "microsoft"
+  val ORCID = "ORCID"
+  val CROSSREF = "Crossref"
+  val UNPAYWALL = "UnpayWall"
+  val GRID_AC = "grid.ac"
+  val WIKPEDIA = "wikpedia"
+  val doiBoostNSPREFIX = "doiboost____"
+  val OPENAIRE_PREFIX = "openaire____"
+  val SEPARATOR = "::"
+  val DNET_LANGUAGES = "dnet:languages"
+  val PID_TYPES = "dnet:pid_types"
+
+
+
+  def generateDataInfo(): DataInfo = {
+    val di = new DataInfo
+    di.setDeletedbyinference(false)
+    di.setInferred(false)
+    di.setInvisible(false)
+    di.setTrust("0.9")
+    di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
+    di
+  }
+
+
+  def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
+    val sp = new StructuredProperty
+    sp.setQualifier(createQualifier(classId, schemeId))
+    sp.setValue(value)
+    sp
+
+  }
+
+  def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
+    val sp = new StructuredProperty
+    sp.setQualifier(createQualifier(classId, schemeId))
+    sp.setValue(value)
+    sp.setDataInfo(dataInfo)
+    sp
+
+  }
+
+  def createCrossrefCollectedFrom(): KeyValue = {
+
+    val cf = new KeyValue
+    cf.setValue(CROSSREF)
+    cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5("crossref"))
+    cf
+
+  }
+
+  def generateIdentifier(oaf: Result, doi: String): String = {
+    val id = DHPUtils.md5(doi.toLowerCase)
+    if (oaf.isInstanceOf[Dataset])
+      return s"60|${doiBoostNSPREFIX}${SEPARATOR}${id}"
+    s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}"
+  }
+
+
+
+
+
+  def createMAGCollectedFrom(): KeyValue = {
+
+    val cf = new KeyValue
+    cf.setValue(MAG)
+    cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(MAG))
+    cf
+
+  }
+
+  def createQualifier(clsName: String, clsValue: String, schName: String, schValue: String): Qualifier = {
+    val q = new Qualifier
+    q.setClassid(clsName)
+    q.setClassname(clsValue)
+    q.setSchemeid(schName)
+    q.setSchemename(schValue)
+    q
+  }
+
+  def createQualifier(cls: String, sch: String): Qualifier = {
+    createQualifier(cls, cls, sch, sch)
+  }
+
+
+  def asField[T](value: T): Field[T] = {
+    val tmp = new Field[T]
+    tmp.setValue(value)
+    tmp
+
+
+  }
+
+
+
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -14,6 +14,7 @@ import org.slf4j.{Logger, LoggerFactory}
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.matching.Regex
+import eu.dnetlib.doiboost.DoiBoostMappingUtil._

 case class mappingAffiliation(name: String) {}

@ -25,18 +26,7 @@ case class mappingFunder(name: String, DOI: Option[String], award: Option[List[S
 case object Crossref2Oaf {
  val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)

-  //STATIC STRING
-  val MAG = "MAG"
-  val ORCID = "ORCID"
-  val CROSSREF = "Crossref"
-  val UNPAYWALL = "UnpayWall"
-  val GRID_AC = "grid.ac"
-  val WIKPEDIA = "wikpedia"
-  val doiBoostNSPREFIX = "doiboost____"
-  val OPENAIRE_PREFIX = "openaire____"
-  val SEPARATOR = "::"
-  val DNET_LANGUAGES = "dnet:languages"
-  val PID_TYPES = "dnet:pid_types"
+

  val mappingCrossrefType = Map(
    "book-section" -> "publication",
@ -116,7 +106,7 @@ case object Crossref2Oaf {
    result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
    result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])

-    result.setCollectedfrom(List(createCollectedFrom()).asJava)
+    result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)

    // Publisher ( Name of work's publisher mapped into  Result/Publisher)
    val publisher = (json \ "publisher").extractOrElse[String](null)
@ -168,7 +158,7 @@ case object Crossref2Oaf {
    result.setInstance(List(instance).asJava)
    instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))

-    instance.setCollectedfrom(createCollectedFrom())
+    instance.setCollectedfrom(createCrossrefCollectedFrom())
    if (StringUtils.isNotBlank(issuedDate)) {
      instance.setDateofacceptance(asField(issuedDate))
    }
@ -215,7 +205,7 @@ case object Crossref2Oaf {
    val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List())

    if (funderList.nonEmpty) {
-      resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp)
+      resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp)
    }


@ -416,71 +406,8 @@ case object Crossref2Oaf {
  }


-  def generateIdentifier(oaf: Result, doi: String): String = {
-    val id = DHPUtils.md5(doi.toLowerCase)
-    if (oaf.isInstanceOf[Dataset])
-      return s"60|${doiBoostNSPREFIX}${SEPARATOR}${id}"
-    s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}"
-  }
-
-  def asField[T](value: T): Field[T] = {
-    val tmp = new Field[T]
-    tmp.setValue(value)
-    tmp


-  }
-
-
-  def generateDataInfo(): DataInfo = {
-    val di = new DataInfo
-    di.setDeletedbyinference(false)
-    di.setInferred(false)
-    di.setInvisible(false)
-    di.setTrust("0.9")
-    di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
-    di
-  }
-
-
-  def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
-    val sp = new StructuredProperty
-    sp.setQualifier(createQualifier(classId, schemeId))
-    sp.setValue(value)
-    sp
-
-  }
-
-  def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
-    val sp = new StructuredProperty
-    sp.setQualifier(createQualifier(classId, schemeId))
-    sp.setValue(value)
-    sp.setDataInfo(dataInfo)
-    sp
-
-  }
-
-  def createCollectedFrom(): KeyValue = {
-
-    val cf = new KeyValue
-    cf.setValue(CROSSREF)
-    cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5("crossref"))
-    cf
-
-  }
-
-  def createQualifier(clsName: String, clsValue: String, schName: String, schValue: String): Qualifier = {
-    val q = new Qualifier
-    q.setClassid(clsName)
-    q.setClassname(clsValue)
-    q.setSchemeid(schName)
-    q.setSchemename(schValue)
-    q
-  }
-
-  def createQualifier(cls: String, sch: String): Qualifier = {
-    createQualifier(cls, cls, sch, sch)
-  }


  def generateItemFromType(objectType: String, objectSubType: String): Result = {
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala
@ -1,52 +1,215 @@
 package eu.dnetlib.doiboost.mag


+import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication}
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods.parse
+import eu.dnetlib.doiboost.DoiBoostMappingUtil._
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.util.matching.Regex


-case class Papers(PaperId:Long, Rank:Integer, Doi:String,
-                  DocType:String, PaperTitle:String, OriginalTitle:String,
-                  BookTitle:String, Year:Option[Integer], Date:Option[java.sql.Timestamp], Publisher:String,
-                  JournalId:Option[Long], ConferenceSeriesId:Option[Long], ConferenceInstanceId:Option[Long],
-                  Volume:String, Issue:String, FirstPage:String, LastPage:String,
-                  ReferenceCount:Option[Long], CitationCount:Option[Long], EstimatedCitation:Option[Long],
-                  OriginalVenue:String, FamilyId:Option[Long], CreatedDate:java.sql.Timestamp) {}
+case class MagPapers(PaperId: Long, Rank: Integer, Doi: String,
+                     DocType: String, PaperTitle: String, OriginalTitle: String,
+                     BookTitle: String, Year: Option[Integer], Date: Option[java.sql.Timestamp], Publisher: String,
+                     JournalId: Option[Long], ConferenceSeriesId: Option[Long], ConferenceInstanceId: Option[Long],
+                     Volume: String, Issue: String, FirstPage: String, LastPage: String,
+                     ReferenceCount: Option[Long], CitationCount: Option[Long], EstimatedCitation: Option[Long],
+                     OriginalVenue: String, FamilyId: Option[Long], CreatedDate: java.sql.Timestamp) {}


-case class PaperAbstract(PaperId:Long,IndexedAbstract:String) {}
+case class MagPaperAbstract(PaperId: Long, IndexedAbstract: String) {}

+case class MagAuthor(AuthorId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], LastKnownAffiliationId: Option[Long], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}
+
+case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String, DisplayName: String, GridId: String, OfficialPage: String, WikiPage: String, PaperCount: Long, CitationCount: Long, Latitude: Option[Float], Longitude: Option[Float], CreatedDate: java.sql.Timestamp) {}
+
+case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {}
+
+
+case class MagAuthorAffiliation(author: MagAuthor, affiliation:String)
+
+case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {}
+
+case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {}
+
+case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {}
+
+case class MagUrl(PaperId: Long, instances: List[String])
+
+
+case class MagJournal(JournalId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], Issn: Option[String], Publisher: Option[String], Webpage: Option[String], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}


 case object ConversionUtil {

+  def extractMagIdentifier(pids:mutable.Buffer[String]) :String ={
+    val magIDRegex: Regex = "^[0-9]+$".r
+    val s =pids.filter(p=> magIDRegex.findAllIn(p).hasNext)

-
-  def transformPaperAbstract(input:PaperAbstract) : PaperAbstract = {
-    PaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract))
+    if (s.nonEmpty)
+      return s.head
+    null
  }



-  def convertInvertedIndexString(json_input:String) :String = {
+  def addInstances(a: (Publication, MagUrl)): Publication = {
+    val pub = a._1
+    val urls = a._2
+
+
+    val i = new Instance
+
+
+    if (urls!= null) {
+
+      val l:List[String] = urls.instances.filter(k=>k.nonEmpty):::List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}")
+
+      i.setUrl(l.asJava)
+    }
+    else
+      i.setUrl(List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}").asJava)
+
+    i.setCollectedfrom(createMAGCollectedFrom())
+    pub.setInstance(List(i).asJava)
+    pub
+  }
+
+
+  def transformPaperAbstract(input: MagPaperAbstract): MagPaperAbstract = {
+    MagPaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract))
+  }
+
+
+  def createOAFFromJournalAuthorPaper(inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList)): Publication = {
+    val paper = inputParams._1._1
+    val journal = inputParams._1._2
+    val authors = inputParams._2
+
+    val pub = new Publication
+    pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
+    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
+
+    //Set identifier as {50|60} | doiboost____::md5(DOI)
+    pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
+
+    val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
+    val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
+    pub.setTitle(List(mainTitles, originalTitles).asJava)
+
+    pub.setSource(List(asField(paper.BookTitle)).asJava)
+
+    val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
+
+      val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
+
+      a.setFullname(f.author.DisplayName.get)
+
+      if(f.affiliation!= null)
+        a.setAffiliation(List(asField(f.affiliation)).asJava)
+      a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
+      a
+    }
+    pub.setAuthor(authorsOAF.asJava)
+
+
+    if (paper.Date != null && paper.Date.isDefined) {
+      pub.setDateofacceptance(asField(paper.Date.get.toString))
+    }
+    pub.setPublisher(asField(paper.Publisher))
+
+
+    if (journal != null && journal.DisplayName.isDefined) {
+      val j = new Journal
+
+      j.setName(journal.DisplayName.get)
+      j.setSp(paper.FirstPage)
+      j.setEp(paper.LastPage)
+      if (journal.Publisher.isDefined)
+        j.setEdition(journal.Publisher.get)
+      if (journal.Issn.isDefined)
+        j.setIssnPrinted(journal.Issn.get)
+      pub.setJournal(j)
+    }
+    pub
+  }
+
+
+  def createOAF(inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract)): Publication = {
+
+    val paper = inputParams._1._1
+    val authors = inputParams._1._2
+    val description = inputParams._2
+
+    val pub = new Publication
+    pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
+    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
+
+    //Set identifier as {50|60} | doiboost____::md5(DOI)
+    pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
+
+    val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
+    val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
+    pub.setTitle(List(mainTitles, originalTitles).asJava)
+
+    pub.setSource(List(asField(paper.BookTitle)).asJava)
+
+
+    if (description != null) {
+      pub.setDescription(List(asField(description.IndexedAbstract)).asJava)
+    }
+
+
+    val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
+
+      val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
+
+      a.setFullname(f.author.DisplayName.get)
+
+      if(f.affiliation!= null)
+        a.setAffiliation(List(asField(f.affiliation)).asJava)
+
+
+      a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
+
+      a
+
+    }
+
+
+    if (paper.Date != null) {
+      pub.setDateofacceptance(asField(paper.Date.toString))
+    }
+
+    pub.setAuthor(authorsOAF.asJava)
+
+
+    pub
+
+  }
+
+
+  def convertInvertedIndexString(json_input: String): String = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(json_input)
-
-
-
    val idl = (json \ "IndexLength").extract[Int]
-
    if (idl > 0) {
      val res = Array.ofDim[String](idl)

      val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]]

-      for {(k:String,v:List[Int]) <- iid}{
+      for {(k: String, v: List[Int]) <- iid} {
        v.foreach(item => res(item) = k)
      }
+     (0 until idl).foreach(i => {
+       if (res(i) == null)
+         res(i) = ""
+     })
      return res.mkString(" ")
-
    }
    ""
  }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
@ -1,13 +1,17 @@
 package eu.dnetlib.doiboost.mag

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.doiboost.DoiBoostMappingUtil.asField
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
 import org.apache.spark.sql.functions._

+import scala.collection.JavaConverters._
+
 object SparkPreProcessMAG {


@ -23,15 +27,21 @@ object SparkPreProcessMAG {
        .config(conf)
        .appName(getClass.getSimpleName)
        .master(parser.get("master")).getOrCreate()
+
+    val sourcePath = parser.get("sourcePath")
    import spark.implicits._
+    implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
+    implicit val tupleForJoinEncoder = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
+
+

    logger.info("Phase 1) make uninque DOI in Papers:")

-    val d: Dataset[Papers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[Papers]
+    val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers]


    // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
-    val result: RDD[Papers] = d.where(col("Doi").isNotNull).rdd.map { p: Papers => Tuple2(p.Doi, p) }.reduceByKey { case (p1: Papers, p2: Papers) =>
+    val result: RDD[MagPapers] = d.where(col("Doi").isNotNull).rdd.map { p: MagPapers => Tuple2(p.Doi, p) }.reduceByKey { case (p1: MagPapers, p2: MagPapers) =>
      var r = if (p1 == null) p2 else p1
      if (p1 != null && p2 != null) {
        if (p1.CreatedDate != null && p2.CreatedDate != null) {
@ -46,16 +56,83 @@ object SparkPreProcessMAG {
      r
    }.map(_._2)

-    val distinctPaper: Dataset[Papers] = spark.createDataset(result)
+    val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
    distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
    logger.info(s"Total number of element: ${result.count()}")

-    logger.info("Phase 2) convert InverdIndex Abastrac to string")
-    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[PaperAbstract]
+    logger.info("Phase 3) Group Author by PaperId")
+    val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor]
+
+    val affiliation =spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation]
+
+    val paperAuthorAffiliation =spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation]
+
+
+    paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId")))
+      .map{case (a:MagPaperAuthorAffiliation,b:MagAuthor )=>  (a.AffiliationId,MagPaperAuthorDenormalized(a.PaperId, b, null)) }
+      .joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left")
+      .map(s => {
+          val mpa = s._1._2
+          val af = s._2
+          if (af!= null) {
+            MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName)
+          } else
+            mpa
+        }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
+      .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors")
+
+
+
+    logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors")
+
+
+    val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal]
+
+    val papers =spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers]
+
+    val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList]
+
+
+
+    val firstJoin =papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")),"left")
+    firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left")
+      .map { a: ((MagPapers, MagJournal), MagPaperWithAuthorList) => ConversionUtil.createOAFFromJournalAuthorPaper(a) }.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2")
+
+
+
+    var magPubs:Dataset[(String,Publication)] = spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication].map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String,Publication)]
+
+    val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl]
+
+
+    logger.info("Phase 5) enrich publication with URL and Instances")
+
+    magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left")
+      .map{a:((String,Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2))}
+      .write.mode(SaveMode.Overwrite)
+      .save(s"${parser.get("targetPath")}/merge_step_3")
+
+
+
+    logger.info("Phase 6) Enrich Publication with description")
+    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")

+    val paperAbstract =spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract]

-    distinctPaper.joinWith(pa, col("PaperId").eqia)
+
+    magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication].map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String,Publication)]
+
+    magPubs.joinWith(paperAbstract,col("_1").equalTo(paperAbstract("PaperId")), "left").map(p=>
+      {
+        val pub = p._1._2
+        val abst = p._2
+        if (abst!= null) {
+          pub.setDescription(List(asField(abst.IndexedAbstract)).asJava)
+        }
+        pub
+        }
+       ).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4")

  }

--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
@ -72,6 +72,7 @@
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml
@ -11,6 +11,10 @@
            <name>queueName</name>
            <value>default</value>
    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml
@ -20,6 +20,10 @@
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the working dir base path</description>
+        </property>
    </parameters>
    
    <start to="ResetWorkingPath"/>
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala
@ -1,10 +1,21 @@
 package eu.dnetlib.doiboost.mag

-import org.codehaus.jackson.map.ObjectMapper
+import eu.dnetlib.dhp.schema.oaf.Publication
+import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature
+import org.apache.spark.SparkConf
+import org.apache.spark.api.java.function.MapFunction
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
 import org.junit.jupiter.api.Test
 import org.slf4j.{Logger, LoggerFactory}
 import org.junit.jupiter.api.Assertions._
+import org.apache.spark.sql.functions._
+
+import scala.collection.JavaConverters._
 import scala.io.Source
+import scala.reflect.ClassTag
+import scala.util.matching.Regex
+


 class MAGMappingTest {
@ -13,14 +24,49 @@ class MAGMappingTest {
  val mapper = new ObjectMapper()


-  //@Test
+  @Test
  def testMAGCSV(): Unit = {
-    SparkPreProcessMAG.main("-m local[*] -s /data/doiboost/mag/datasets -t /data/doiboost/mag/datasets/preprocess".split(" "))
+    // SparkPreProcessMAG.main("-m local[*] -s /data/doiboost/mag/datasets -t /data/doiboost/mag/datasets/preprocess".split(" "))
+
+    val sparkConf: SparkConf = new SparkConf
+
+    val spark: SparkSession = SparkSession.builder()
+      .config(sparkConf)
+      .appName(getClass.getSimpleName)
+      .master("local[*]")
+      .getOrCreate()
+
+    import spark.implicits._
+
+
+    implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
+    implicit val longBarEncoder = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
+
+    val sourcePath = "/data/doiboost/mag/input"
+
+    mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
+
+
+   val magOAF = spark.read.load("$sourcePath/merge_step_4").as[Publication]
+
+   println(magOAF.first().getOriginalId)
+
+
+   magOAF.map(k => (ConversionUtil.extractMagIdentifier(k.getOriginalId.asScala),k)).as[(String,Publication)].show()
+
+
+    println((ConversionUtil.extractMagIdentifier(magOAF.first().getOriginalId.asScala)))
+
+    val magIDRegex: Regex = "^[0-9]+$".r
+
+
+    println(magIDRegex.findFirstMatchIn("suca").isDefined)
+
  }


  @Test
-  def buildInvertedIndexTest() :Unit = {
+  def buildInvertedIndexTest(): Unit = {
    val json_input = Source.fromInputStream(getClass.getResourceAsStream("invertedIndex.json")).mkString
    val description = ConversionUtil.convertInvertedIndexString(json_input)
    assertNotNull(description)
@ -32,3 +78,5 @@ class MAGMappingTest {


 }
+
+
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java
@ -15,6 +15,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.*;

 public class PropagationConstant {
@ -24,10 +26,6 @@ public class PropagationConstant {

 	public static final String TRUE = "true";

-	public static final String DNET_COUNTRY_SCHEMA = "dnet:countries";
-	public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions";
-	public static final String DNET_SCHEMA_ID = "dnet:provenanceActions";
-
 	public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_ID = "country:instrepos";
 	public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME = "Propagation of country to result collected from datasources of type institutional repositories";

@ -46,22 +44,6 @@ public class PropagationConstant {
 	public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
 	public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";

-	public static final String RELATION_DATASOURCE_ORGANIZATION_REL_CLASS = "isProvidedBy";
-
-	public static final String RELATION_RESULTORGANIZATION_REL_TYPE = "resultOrganization";
-	public static final String RELATION_RESULTORGANIZATION_SUBREL_TYPE = "affiliation";
-	public static final String RELATION_ORGANIZATION_RESULT_REL_CLASS = "isAuthorInstitutionOf";
-	public static final String RELATION_RESULT_ORGANIZATION_REL_CLASS = "hasAuthorInstitution";
-
-	public static final String RELATION_RESULTRESULT_REL_TYPE = "resultResult";
-
-	public static final String RELATION_RESULTPROJECT_REL_TYPE = "resultProject";
-	public static final String RELATION_RESULTPROJECT_SUBREL_TYPE = "outcome";
-	public static final String RELATION_RESULT_PROJECT_REL_CLASS = "isProducedBy";
-	public static final String RELATION_PROJECT_RESULT_REL_CLASS = "produces";
-
-	public static final String RELATION_REPRESENTATIVERESULT_RESULT_CLASS = "merges";
-
 	public static final String PROPAGATION_AUTHOR_PID = "ORCID";

 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -76,8 +58,8 @@ public class PropagationConstant {
 		Country nc = new Country();
 		nc.setClassid(classid);
 		nc.setClassname(classname);
-		nc.setSchemename(DNET_COUNTRY_SCHEMA);
-		nc.setSchemeid(DNET_COUNTRY_SCHEMA);
+		nc.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
+		nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
 		nc
 			.setDataInfo(
 				getDataInfo(
@ -102,8 +84,8 @@ public class PropagationConstant {
 		Qualifier pa = new Qualifier();
 		pa.setClassid(inference_class_id);
 		pa.setClassname(inference_class_name);
-		pa.setSchemeid(DNET_SCHEMA_ID);
-		pa.setSchemename(DNET_SCHEMA_NAME);
+		pa.setSchemeid(ModelConstants.DNET_PID_TYPES);
+		pa.setSchemename(ModelConstants.DNET_PID_TYPES);
 		return pa;
 	}

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -1,6 +1,7 @@

 package eu.dnetlib.dhp.bulktag;

+import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.util.Optional;
@ -84,6 +85,7 @@ public class SparkBulkTagJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
+				removeOutputDir(spark, outputPath);
 				execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
 			});
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -21,6 +21,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;

 /**
@ -100,7 +101,7 @@ public class PrepareDatasourceCountryAssociation {
 			+ "JOIN ( SELECT source, target "
 			+ "       FROM relation "
 			+ "       WHERE relclass = '"
-			+ RELATION_DATASOURCE_ORGANIZATION_REL_CLASS
+			+ ModelConstants.IS_PROVIDED_BY
 			+ "' "
 			+ "       AND datainfo.deletedbyinference = false ) rel "
 			+ "ON d.id = rel.source "
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -69,13 +69,16 @@ public class SparkCountryPropagationJob {
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
-			spark -> execPropagation(
-				spark,
-				sourcePath,
-				preparedInfoPath,
-				outputPath,
-				resultClazz,
-				saveGraph));
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				execPropagation(
+					spark,
+					sourcePath,
+					preparedInfoPath,
+					outputPath,
+					resultClazz,
+					saveGraph);
+			});
 	}

 	private static <R extends Result> void execPropagation(
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
@ -74,9 +74,7 @@ public class PrepareResultOrcidAssociationStep1 {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
+				removeOutputDir(spark, outputPath);
 				prepareInfo(
 					spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel);
 			});
@ -97,22 +95,22 @@ public class PrepareResultOrcidAssociationStep1 {
 		Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
 		result.createOrReplaceTempView("result");

-		String query = " select target resultId, author authorList"
-			+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
-			+ " from ( "
-			+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
-			+ " from result "
-			+ " lateral view explode (author) a as MyT "
-			+ " lateral view explode (MyT.pid) p as MyP "
-			+ " where MyP.qualifier.classid = 'ORCID') tmp "
-			+ " group by id) r_t "
-			+ " join ("
-			+ " select source, target "
-			+ " from relation "
-			+ " where datainfo.deletedbyinference = false "
+		String query = "SELECT target resultId, author authorList"
+			+ "  FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
+			+ "        FROM ( "
+			+ "               SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
+			+ "               FROM result "
+			+ "               LATERAL VIEW EXPLODE (author) a AS MyT "
+			+ "               LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+			+ "               WHERE MyP.qualifier.classid = 'ORCID') tmp "
+			+ "               GROUP BY id) r_t "
+			+ " JOIN ("
+			+ "        SELECT source, target "
+			+ "        FROM relation "
+			+ "        WHERE datainfo.deletedbyinference = false "
 			+ getConstraintList(" relclass = '", allowedsemrel)
-			+ ") rel_rel "
-			+ " on source = id";
+			+ "              ) rel_rel "
+			+ " ON source = id";
 		spark
 			.sql(query)
 			.as(Encoders.bean(ResultOrcidList.class))
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java
@ -50,9 +50,7 @@ public class PrepareResultOrcidAssociationStep2 {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
+				removeOutputDir(spark, outputPath);
 				mergeInfo(spark, inputPath, outputPath);
 			});
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
@ -70,11 +70,10 @@ public class SparkOrcidToResultFromSemRelJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
-				if (saveGraph)
+				removeOutputDir(spark, outputPath);
+				if (saveGraph) {
 					execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
+				}
 			});
 	}

@ -132,16 +131,16 @@ public class SparkOrcidToResultFromSemRelJob {
 	private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
 		boolean toaddpid = false;

-		if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) {
-			if (StringUtils.isNoneEmpty(author.getSurname())) {
+		if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
+			if (StringUtils.isNotEmpty(author.getSurname())) {
 				if (autoritative_author
 					.getSurname()
 					.trim()
 					.equalsIgnoreCase(author.getSurname().trim())) {

 					// have the same surname. Check the name
-					if (StringUtils.isNoneEmpty(autoritative_author.getName())) {
-						if (StringUtils.isNoneEmpty(author.getName())) {
+					if (StringUtils.isNotEmpty(autoritative_author.getName())) {
+						if (StringUtils.isNotEmpty(author.getName())) {
 							if (autoritative_author
 								.getName()
 								.trim()
@ -150,12 +149,14 @@ public class SparkOrcidToResultFromSemRelJob {
 							}
 							// they could be differently written (i.e. only the initials of the name
 							// in one of the two
-							if (autoritative_author
-								.getName()
-								.trim()
-								.substring(0, 0)
-								.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
-								toaddpid = true;
+							else {
+								if (autoritative_author
+									.getName()
+									.trim()
+									.substring(0, 0)
+									.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
+									toaddpid = true;
+								}
 							}
 						}
 					}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java
@ -21,6 +21,7 @@ import com.google.gson.Gson;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;

 public class PrepareProjectResultsAssociation {
@ -60,6 +61,8 @@ public class PrepareProjectResultsAssociation {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
+				removeOutputDir(spark, potentialUpdatePath);
+				removeOutputDir(spark, alreadyLinkedPath);
 				prepareResultProjProjectResults(
 					spark,
 					inputPath,
@ -83,7 +86,7 @@ public class PrepareProjectResultsAssociation {
 			+ "       FROM relation "
 			+ "       WHERE datainfo.deletedbyinference = false "
 			+ "       AND relClass = '"
-			+ RELATION_RESULT_PROJECT_REL_CLASS
+			+ ModelConstants.IS_PRODUCED_BY
 			+ "'";

 		Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
@ -20,6 +20,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;

@ -122,9 +123,9 @@ public class SparkResultToProjectThroughSemRelJob {
 								getRelation(
 									resId,
 									projectId,
-									RELATION_RESULT_PROJECT_REL_CLASS,
-									RELATION_RESULTPROJECT_REL_TYPE,
-									RELATION_RESULTPROJECT_SUBREL_TYPE,
+									ModelConstants.IS_PRODUCED_BY,
+									ModelConstants.RESULT_PROJECT,
+									ModelConstants.OUTCOME,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
@ -133,9 +134,9 @@ public class SparkResultToProjectThroughSemRelJob {
 								getRelation(
 									projectId,
 									resId,
-									RELATION_PROJECT_RESULT_REL_CLASS,
-									RELATION_RESULTPROJECT_REL_TYPE,
-									RELATION_RESULTPROJECT_SUBREL_TYPE,
+									ModelConstants.PRODUCES,
+									ModelConstants.RESULT_PROJECT,
+									ModelConstants.OUTCOME,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
@ -17,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;

 public class PrepareResultCommunitySet {
@ -55,9 +56,7 @@ public class PrepareResultCommunitySet {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
+				removeOutputDir(spark, outputPath);
 				prepareInfo(spark, inputPath, outputPath, organizationMap);
 			});
 	}
@ -76,13 +75,13 @@ public class PrepareResultCommunitySet {
 			+ "      FROM relation "
 			+ "      WHERE datainfo.deletedbyinference = false "
 			+ "      AND relClass = '"
-			+ RELATION_RESULT_ORGANIZATION_REL_CLASS
+			+ ModelConstants.HAS_AUTHOR_INSTITUTION
 			+ "') result_organization "
 			+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
 			+ "      FROM relation "
 			+ "      WHERE datainfo.deletedbyinference = false "
 			+ "      AND relClass = '"
-			+ RELATION_REPRESENTATIVERESULT_RESULT_CLASS
+			+ ModelConstants.MERGES
 			+ "' "
 			+ "      GROUP BY source) organization_organization "
 			+ "ON result_organization.target = organization_organization.source ";
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
@ -68,11 +68,10 @@ public class SparkResultToCommunityFromOrganizationJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
-				if (saveGraph)
+				removeOutputDir(spark, outputPath);
+				if (saveGraph) {
 					execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
+				}
 			});
 	}

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java
@ -53,9 +53,7 @@ public class PrepareResultCommunitySetStep2 {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
+				removeOutputDir(spark, outputPath);
 				mergeInfo(spark, inputPath, outputPath);
 			});
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
@ -17,6 +17,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Relation;
@ -58,30 +59,15 @@ public class PrepareResultInstRepoAssociation {
 			isSparkSessionManaged,
 			spark -> {
 				readNeededResources(spark, inputPath);
+
+				removeOutputDir(spark, datasourceOrganizationPath);
 				prepareDatasourceOrganization(spark, datasourceOrganizationPath);
+
+				removeOutputDir(spark, alreadyLinkedPath);
 				prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath);
 			});
 	}

-	private static void prepareAlreadyLinkedAssociation(
-		SparkSession spark, String alreadyLinkedPath) {
-		String query = "Select source resultId, collect_set(target) organizationSet "
-			+ "from relation "
-			+ "where datainfo.deletedbyinference = false "
-			+ "and relClass = '"
-			+ RELATION_RESULT_ORGANIZATION_REL_CLASS
-			+ "' "
-			+ "group by source";
-
-		spark
-			.sql(query)
-			.as(Encoders.bean(ResultOrganizationSet.class))
-			// TODO retry to stick with datasets
-			.toJavaRDD()
-			.map(r -> OBJECT_MAPPER.writeValueAsString(r))
-			.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
-	}
-
 	private static void readNeededResources(SparkSession spark, String inputPath) {
 		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
 		datasource.createOrReplaceTempView("datasource");
@ -106,7 +92,7 @@ public class PrepareResultInstRepoAssociation {
 			+ "JOIN ( SELECT source, target "
 			+ "FROM relation "
 			+ "WHERE relclass = '"
-			+ RELATION_DATASOURCE_ORGANIZATION_REL_CLASS
+			+ ModelConstants.IS_PROVIDED_BY
 			+ "' "
 			+ "AND datainfo.deletedbyinference = false ) rel "
 			+ "ON d.id = rel.source ";
@ -119,4 +105,24 @@ public class PrepareResultInstRepoAssociation {
 			.option("compression", "gzip")
 			.json(datasourceOrganizationPath);
 	}
+
+	private static void prepareAlreadyLinkedAssociation(
+		SparkSession spark, String alreadyLinkedPath) {
+		String query = "Select source resultId, collect_set(target) organizationSet "
+			+ "from relation "
+			+ "where datainfo.deletedbyinference = false "
+			+ "and relClass = '"
+			+ ModelConstants.HAS_AUTHOR_INSTITUTION
+			+ "' "
+			+ "group by source";
+
+		spark
+			.sql(query)
+			.as(Encoders.bean(ResultOrganizationSet.class))
+			// TODO retry to stick with datasets
+			.toJavaRDD()
+			.map(r -> OBJECT_MAPPER.writeValueAsString(r))
+			.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
+	}
+
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;
 import scala.Tuple2;

@ -83,10 +84,8 @@ public class SparkResultToOrganizationFromIstRepoJob {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				if (isTest(parser)) {
-					removeOutputDir(spark, outputPath);
-				}
-				if (saveGraph)
+				// removeOutputDir(spark, outputPath);
+				if (saveGraph) {
 					execPropagation(
 						spark,
 						datasourceorganization,
@ -94,6 +93,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
 						inputPath,
 						outputPath,
 						resultClazz);
+				}
 			});
 	}

@ -151,9 +151,9 @@ public class SparkResultToOrganizationFromIstRepoJob {
 								getRelation(
 									orgId,
 									resultId,
-									RELATION_ORGANIZATION_RESULT_REL_CLASS,
-									RELATION_RESULTORGANIZATION_REL_TYPE,
-									RELATION_RESULTORGANIZATION_SUBREL_TYPE,
+									ModelConstants.IS_AUTHOR_INSTITUTION_OF,
+									ModelConstants.RESULT_ORGANIZATION,
+									ModelConstants.AFFILIATION,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME));
@ -162,9 +162,9 @@ public class SparkResultToOrganizationFromIstRepoJob {
 								getRelation(
 									resultId,
 									orgId,
-									RELATION_RESULT_ORGANIZATION_REL_CLASS,
-									RELATION_RESULTORGANIZATION_REL_TYPE,
-									RELATION_RESULTORGANIZATION_SUBREL_TYPE,
+									ModelConstants.HAS_AUTHOR_INSTITUTION,
+									ModelConstants.RESULT_ORGANIZATION,
+									ModelConstants.AFFILIATION,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME));
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -18,6 +18,17 @@
        </property>
    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -42,8 +53,6 @@

    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
@ -53,8 +62,6 @@

    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
@ -64,8 +71,6 @@

    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -75,8 +80,6 @@

    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
@ -95,8 +98,6 @@

    <action name="join_bulktag_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-publication</name>
@ -124,8 +125,6 @@

    <action name="join_bulktag_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-dataset</name>
@ -153,8 +152,6 @@

    <action name="join_bulktag_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-orp</name>
@ -182,8 +179,6 @@

    <action name="join_bulktag_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-software</name>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@ -19,6 +19,17 @@

    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -43,8 +54,6 @@

    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
@ -54,18 +63,15 @@

    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
+
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -75,8 +81,6 @@

    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -57,6 +57,7 @@
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
+
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
@ -81,7 +82,6 @@

    <join name="copy_wait" to="fork_prepare_assoc_step1"/>

-
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
        <path start="join_prepare_dataset"/>
@ -230,8 +230,8 @@
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
-
    </action>
+
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
@ -271,6 +271,7 @@
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
+
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -302,6 +303,7 @@
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
+
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -333,6 +335,7 @@
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
+
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
@ -14,6 +14,17 @@
       </property>
    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -42,8 +53,6 @@

    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
@ -53,8 +62,6 @@

    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
@ -64,8 +71,6 @@

    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
@ -75,8 +80,6 @@

    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
@ -86,28 +89,24 @@

    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
+
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
+
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -117,8 +116,6 @@

    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
@ -14,6 +14,17 @@
        </property>
    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -38,8 +49,6 @@

    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
@ -49,8 +58,6 @@

    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
@ -60,8 +67,6 @@

    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -71,8 +76,6 @@

    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
@ -101,8 +104,8 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
+            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
@ -136,9 +139,9 @@
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
+            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
@ -165,9 +168,9 @@
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
+            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
@ -194,9 +197,9 @@
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
+            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
@ -223,9 +226,9 @@
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
+            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@ -10,6 +10,17 @@
        </property>
    </parameters>

+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
    <start to="reset_outputpath"/>

    <kill name="Kill">
@ -38,8 +49,6 @@

    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
@ -49,8 +58,6 @@

    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
@ -60,8 +67,6 @@

    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
@ -71,8 +76,6 @@

    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
@ -82,8 +85,6 @@

    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
@ -93,8 +94,6 @@

    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
@ -104,8 +103,6 @@

    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
@ -115,8 +112,6 @@

    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
@ -125,6 +120,7 @@
    </action>

    <join name="wait" to="prepare_result_organization_association"/>
+
    <action name="prepare_result_organization_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -176,12 +172,12 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
+            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
@ -206,12 +202,12 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
+            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
@ -236,12 +232,12 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
+            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
@ -266,12 +262,12 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
-            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
-            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
+            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
+            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java
@ -0,0 +1,79 @@
+
+package eu.dnetlib.dhp.oa.graph.hive;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+import static eu.dnetlib.dhp.schema.common.ModelSupport.tableIdentifier;
+
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+
+public class GraphHiveTableImporterJob {
+
+	private static final Logger log = LoggerFactory.getLogger(GraphHiveTableImporterJob.class);
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static void main(String[] args) throws Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GraphHiveTableImporterJob.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json")));
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		String inputPath = parser.get("inputPath");
+		log.info("inputPath: {}", inputPath);
+
+		String hiveDbName = parser.get("hiveDbName");
+		log.info("hiveDbName: {}", hiveDbName);
+
+		final String className = parser.get("className");
+		log.info("className: {}", className);
+
+		Class<? extends Oaf> clazz = (Class<? extends Oaf>) Class.forName(className);
+
+		String hiveMetastoreUris = parser.get("hiveMetastoreUris");
+		log.info("hiveMetastoreUris: {}", hiveMetastoreUris);
+
+		SparkConf conf = new SparkConf();
+		conf.set("hive.metastore.uris", hiveMetastoreUris);
+
+		runWithSparkHiveSession(
+			conf, isSparkSessionManaged, spark -> loadGraphTable(spark, inputPath, hiveDbName, clazz));
+	}
+
+	// protected for testing
+	private static <T extends Oaf> void loadGraphTable(SparkSession spark, String inputPath, String hiveDbName,
+		Class<T> clazz) {
+
+		spark
+			.read()
+			.textFile(inputPath)
+			.map((MapFunction<String, T>) s -> OBJECT_MAPPER.readValue(s, clazz), Encoders.bean(clazz))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.saveAsTable(tableIdentifier(hiveDbName, clazz));
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@ -127,7 +127,6 @@ public abstract class AbstractMdRecordToOafMapper {
 		final List<Oaf> oafs = new ArrayList<>();

 		switch (type.toLowerCase()) {
-			case "":
 			case "publication":
 				final Publication p = new Publication();
 				populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
@ -138,7 +137,7 @@ public abstract class AbstractMdRecordToOafMapper {
 			case "dataset":
 				final Dataset d = new Dataset();
 				populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
-				d.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
+				d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
 				d.setStoragedate(prepareDatasetStorageDate(doc, info));
 				d.setDevice(prepareDatasetDevice(doc, info));
 				d.setSize(prepareDatasetSize(doc, info));
@ -158,6 +157,7 @@ public abstract class AbstractMdRecordToOafMapper {
 				s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
 				oafs.add(s);
 				break;
+			case "":
 			case "otherresearchproducts":
 			default:
 				final OtherResearchProduct o = new OtherResearchProduct();
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Software;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

-public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
-	implements Closeable {
+public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {

 	private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);

@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
 	}

 	public List<Oaf> processDatasource(final ResultSet rs) {
-
 		try {
-
 			final DataInfo info = prepareDataInfo(rs);

 			final Datasource ds = new Datasource();
@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication

 	public List<Oaf> processProject(final ResultSet rs) {
 		try {
-
 			final DataInfo info = prepareDataInfo(rs);

 			final Project p = new Project();
@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
 	}

 	public List<Oaf> processOrganization(final ResultSet rs) {
-
 		try {
-
 			final DataInfo info = prepareDataInfo(rs);

 			final Organization o = new Organization();
@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication

 		final DataInfo info = dataInfo(
 			false, null, false, false,
-
 			qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");

 		final List<KeyValue> collectedFrom = listKeyValues(
 			createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");

 		try {
-
 			if (rs.getString(SOURCE_TYPE).equals("context")) {
 				final Result r;

@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
 		final Boolean inferred = rs.getBoolean("inferred");
 		final String trust = rs.getString("trust");
 		return dataInfo(
-
-			deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
-
+			deletedbyinference,
+			inferenceprovenance,
+			inferred,
+			false,
+			ENTITYREGISTRY_PROVENANCE_ACTION,
+			trust);
 	}

 	private Qualifier prepareQualifierSplitting(final String s) {
@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
 		super.close();
 		dbClient.close();
 	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
@ -1,8 +1,7 @@

 package eu.dnetlib.dhp.oa.graph.raw;

-import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
-import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
+import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.*;

 import java.util.*;
@ -10,11 +9,13 @@ import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
+import org.dom4j.Element;
 import org.dom4j.Node;

 import com.google.common.collect.Lists;

 import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.*;

 public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -28,15 +29,26 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
 		final List<Author> res = new ArrayList<>();
 		int pos = 1;
 		for (final Object o : doc.selectNodes("//dc:creator")) {
-			final Node n = (Node) o;
+			final Element e = (Element) o;
 			final Author author = new Author();
-			author.setFullname(n.getText());
+			author.setFullname(e.getText());
 			author.setRank(pos++);
-			final PacePerson p = new PacePerson(n.getText(), false);
+			final PacePerson p = new PacePerson(e.getText(), false);
 			if (p.isAccurate()) {
 				author.setName(p.getNormalisedFirstName());
 				author.setSurname(p.getNormalisedSurname());
 			}
+
+			final String pid = e.attributeValue("nameIdentifier");
+			final String pidType = e.attributeValue("nameIdentifierScheme");
+
+			author.setPid(new ArrayList<>());
+			if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
+				author
+					.getPid()
+					.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
+			}
+
 			res.add(author);
 		}
 		return res;
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
 import org.dom4j.Node;

+import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
@ -44,20 +45,35 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 		for (final Object o : doc.selectNodes("//datacite:creator")) {
 			final Node n = (Node) o;
 			final Author author = new Author();
-			author.setFullname(n.valueOf("./datacite:creatorName"));
-			author.setName(n.valueOf("./datacite:givenName"));
-			author.setSurname(n.valueOf("./datacite:familyName"));
-			author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
-			author.setPid(preparePids(doc, info));
+			final String fullname = n.valueOf("./datacite:creatorName");
+			author.setFullname(fullname);
+
+			PacePerson pp = new PacePerson(fullname, false);
+			final String name = n.valueOf("./datacite:givenName");
+			if (StringUtils.isBlank(name) & pp.isAccurate()) {
+				author.setName(pp.getNormalisedFirstName());
+			} else {
+				author.setName(name);
+			}
+
+			final String surname = n.valueOf("./datacite:familyName");
+			if (StringUtils.isBlank(surname) & pp.isAccurate()) {
+				author.setSurname(pp.getNormalisedSurname());
+			} else {
+				author.setSurname(surname);
+			}
+
+			author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
+			author.setPid(preparePids(n, info));
 			author.setRank(pos++);
 			res.add(author);
 		}
 		return res;
 	}

-	private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) {
+	private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
 		final List<StructuredProperty> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) {
+		for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
 			res
 				.add(
 					structuredProperty(
@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 		final KeyValue hostedby) {

 		final Instance instance = new Instance();
-		final Set<String> url = new HashSet<>();
-		instance.setUrl(new ArrayList<>());
 		instance
 			.setInstancetype(
 				prepareQualifier(
@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 			.setProcessingchargecurrency(
 				field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));

+		final Set<String> url = new HashSet<>();
 		for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
 			url.add(((Node) o).getText().trim());
 		}
@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 		for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
 			url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
 		}
-		instance.getUrl().addAll(url);
+		if (!url.isEmpty()) {
+			instance.setUrl(new ArrayList<>());
+			instance.getUrl().addAll(url);
+		}
 		return Arrays.asList(instance);
 	}

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
@ -1,7 +1,6 @@

 package eu.dnetlib.dhp.oa.graph.raw.common;

-import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
 import java.util.HashSet;
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql
@ -0,0 +1,2 @@
+DROP DATABASE IF EXISTS ${hiveDbName} CASCADE;
+CREATE DATABASE ${hiveDbName};
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
@ -72,18 +72,45 @@
        </configuration>
    </global>

-    <start to="MapGraphAsHiveDB"/>
+    <start to="reset_DB"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="MapGraphAsHiveDB">
+    <action name="reset_DB">
+        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+            <configuration>
+                <property>
+                    <name>hive.metastore.uris</name>
+                    <value>${hiveMetastoreUris}</value>
+                </property>
+            </configuration>
+            <jdbc-url>${hiveJdbcUrl}/${hiveDbName}</jdbc-url>
+            <script>lib/scripts/reset_db.sql</script>
+            <param>hiveDbName=${hiveDbName}</param>
+        </hive2>
+        <ok to="fork_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name="fork_import">
+        <path start="import_publication"/>
+        <path start="import_dataset"/>
+        <path start="import_orp"/>
+        <path start="import_software"/>
+        <path start="import_datasource"/>
+        <path start="import_organization"/>
+        <path start="import_project"/>
+        <path start="import_relation"/>
+    </fork>
+
+    <action name="import_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
-            <name>MapGraphAsHiveDB</name>
-            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob</class>
+            <name>Import table publication</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
@ -95,18 +122,201 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--inputPath</arg><arg>${inputPath}</arg>
+            <arg>--inputPath</arg><arg>${inputPath}/publication</arg>
            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
        </spark>
-        <ok to="PostProcessing"/>
+        <ok to="join_import"/>
        <error to="Kill"/>
    </action>

+    <action name="import_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table dataset</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/dataset</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table otherresearchproduct</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/otherresearchproduct</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table software</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/software</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_datasource">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table datasource</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/datasource</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_organization">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table organization</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/organization</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_project">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table project</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/project</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="import_relation">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Import table project</name>
+            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/relation</arg>
+            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
+            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
+            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
+        </spark>
+        <ok to="join_import"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_import" to="PostProcessing"/>
+
    <action name="PostProcessing">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>hive.metastore.uris</name>
@ -122,4 +332,5 @@
    </action>

    <end name="End"/>
+
 </workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json
@ -0,0 +1,26 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "in",
+    "paramLongName": "inputPath",
+    "paramDescription": "the path to the graph data dump to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "hmu",
+    "paramLongName": "hiveMetastoreUris",
+    "paramDescription": "the hive metastore uris",
+    "paramRequired": true
+  },
+  {
+    "paramName": "db",
+    "paramLongName": "hiveDbName",
+    "paramDescription": "the target hive database name",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json
@ -0,0 +1,32 @@
+[
+  {
+    "paramName": "issm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "when true will stop SparkSession after job execution",
+    "paramRequired": false
+  },
+  {
+    "paramName": "in",
+    "paramLongName": "inputPath",
+    "paramDescription": "the path to the graph data dump to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "hmu",
+    "paramLongName": "hiveMetastoreUris",
+    "paramDescription": "the hive metastore uris",
+    "paramRequired": true
+  },
+  {
+    "paramName": "db",
+    "paramLongName": "hiveDbName",
+    "paramDescription": "the target hive database name",
+    "paramRequired": true
+  },
+  {
+    "paramName": "tn",
+    "paramLongName": "className",
+    "paramDescription": "the class modelling the target table",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json
@ -1,10 +0,0 @@
-[
-  {"paramName":"is", "paramLongName":"isLookupUrl",      "paramDescription": "URL of the isLookUp Service",       "paramRequired": true},
-  {"paramName":"sn", "paramLongName":"sourceNameNode",   "paramDescription": "nameNode of the source cluster",    "paramRequired": true},
-  {"paramName":"tn", "paramLongName":"targetNameNode",   "paramDescription": "namoNode of the target cluster",    "paramRequired": true},
-  {"paramName":"w",  "paramLongName":"workingDirectory", "paramDescription": "working directory",    "paramRequired": true},
-  {"paramName":"nm", "paramLongName":"distcp_num_maps",  "paramDescription": "maximum number of map tasks used in the distcp process",    "paramRequired": true},
-  {"paramName":"mm", "paramLongName":"distcp_memory_mb", "paramDescription": "memory for distcp action copying actionsets from remote cluster",    "paramRequired": true},
-  {"paramName":"tt", "paramLongName":"distcp_task_timeout", "paramDescription": "timeout for distcp copying actions from remote cluster",   "paramRequired": true},
-  {"paramName":"tr", "paramLongName":"transform_only",   "paramDescription": "activate tranform-only mode. Only apply transformation step", "paramRequired": true}
-]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json
@ -1,20 +0,0 @@
-[
-  {
-    "paramName": "mt",
-    "paramLongName": "master",
-    "paramDescription": "should be local or yarn",
-    "paramRequired": true
-  },
-  {
-    "paramName": "is",
-    "paramLongName": "isLookupUrl",
-    "paramDescription": "URL of the isLookUp Service",
-    "paramRequired": true
-  },
-  {
-    "paramName": "i",
-    "paramLongName": "inputPaths",
-    "paramDescription": "URL of the isLookUp Service",
-    "paramRequired": true
-  }
-]
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -10,6 +10,7 @@ import static org.mockito.Mockito.when;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@ -19,11 +20,8 @@ import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;

-import eu.dnetlib.dhp.schema.oaf.Dataset;
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.dhp.schema.oaf.Relation;
-import eu.dnetlib.dhp.schema.oaf.Software;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.*;

@ExtendWith(MockitoExtension.class)
 public class MappersTest {
@ -54,7 +52,29 @@ public class MappersTest {
 		assertValidId(p.getId());
 		assertValidId(p.getCollectedfrom().get(0).getKey());
 		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
+
 		assertTrue(p.getAuthor().size() > 0);
+		Optional<Author> author = p
+			.getAuthor()
+			.stream()
+			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+			.findFirst();
+		assertTrue(author.isPresent());
+		StructuredProperty pid = author
+			.get()
+			.getPid()
+			.stream()
+			.findFirst()
+			.get();
+		assertEquals("0000-0001-6651-1178", pid.getValue());
+		assertEquals("ORCID", pid.getQualifier().getClassid());
+		assertEquals("ORCID", pid.getQualifier().getClassname());
+		assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
+		assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
+		assertEquals("Votsi,Nefta", author.get().getFullname());
+		assertEquals("Votsi", author.get().getSurname());
+		assertEquals("Nefta", author.get().getName());
+
 		assertTrue(p.getSubject().size() > 0);
 		assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
 		assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
@ -100,6 +120,38 @@ public class MappersTest {
 		assertValidId(d.getCollectedfrom().get(0).getKey());
 		assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
 		assertTrue(d.getAuthor().size() > 0);
+
+		Optional<Author> author = d
+			.getAuthor()
+			.stream()
+			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+			.findFirst();
+		assertTrue(author.isPresent());
+		StructuredProperty pid = author
+			.get()
+			.getPid()
+			.stream()
+			.findFirst()
+			.get();
+		assertEquals("0000-0001-9074-1619", pid.getValue());
+		assertEquals("ORCID", pid.getQualifier().getClassid());
+		assertEquals("ORCID", pid.getQualifier().getClassname());
+		assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
+		assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
+		assertEquals("Baracchini, Theo", author.get().getFullname());
+		assertEquals("Baracchini", author.get().getSurname());
+		assertEquals("Theo", author.get().getName());
+
+		assertEquals(1, author.get().getAffiliation().size());
+		Optional<Field<String>> opAff = author
+			.get()
+			.getAffiliation()
+			.stream()
+			.findFirst();
+		assertTrue(opAff.isPresent());
+		Field<String> affiliation = opAff.get();
+		assertEquals("ISTI-CNR", affiliation.getValue());
+
 		assertTrue(d.getSubject().size() > 0);
 		assertTrue(d.getInstance().size() > 0);
 		assertTrue(d.getContext().size() > 0);
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
@ -19,7 +19,7 @@
  <metadata xmlns="http://namespace.openaire.eu/">
    <dc:title>Ecosystem Service capacity is higher in areas of multiple designation types</dc:title>
    <dc:creator>Nikolaidou,Charitini</dc:creator>
-    <dc:creator>Votsi,Nefta</dc:creator>
+    <dc:creator nameIdentifier="0000-0001-6651-1178" nameIdentifierScheme="ORCID">Votsi,Nefta</dc:creator>
    <dc:creator>Sgardelis,Steanos</dc:creator>
    <dc:creator>Halley,John</dc:creator>
    <dc:creator>Pantis,John</dc:creator>
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
@ -35,9 +35,10 @@
        </creator>
        <creator>
          <creatorName>Baracchini, Theo</creatorName>
+          <nameIdentifier nameIdentifierScheme="ORCID">0000-0001-9074-1619</nameIdentifier>
          <givenName>Theo</givenName>
          <familyName>Baracchini</familyName>
-          <affiliation>Physics of Aquatic Systems Laboratory (APHYS) – Margaretha Kamprad Chair, ENAC, EPFL, Lausanne, 1015, Switzerland</affiliation>
+          <affiliation>ISTI-CNR</affiliation>
        </creator>
        <creator>
          <creatorName>Wüest, Alfred</creatorName>