Merge remote-tracking branch 'origin/master' into doiboost

2020-05-13 10:38:13 +02:00 · 2020-05-13 10:38:13 +02:00 · a92ee0f41e
parent d876f47d06 1ddd33de41
commit a92ee0f41e
156 changed files with 13615 additions and 85 deletions
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@ -83,6 +83,10 @@
 			<groupId>com.jayway.jsonpath</groupId>
 			<artifactId>json-path</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>org.postgresql</groupId>
 			<artifactId>postgresql</artifactId>
 		</dependency>
 	</dependencies>
 </project>
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java
@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common;
 import java.io.Closeable;
 import java.io.IOException;
@ -14,7 +14,7 @@ public class DbClient implements Closeable {
 	private static final Log log = LogFactory.getLog(DbClient.class);
-	private final Connection connection;
+	private Connection connection;
 	public DbClient(final String address, final String login, final String password) {
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
 public class ModelSupport {
 	/** Defines the mapping between the actual entity type and the main entity type */
-	private static final Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
+	private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
 	static {
 		entityMapping.put(EntityType.publication, MainEntityType.result);
@ -53,6 +53,232 @@ public class ModelSupport {
 		oafTypes.put("relation", Relation.class);
 	}
 	public static final Map<String, String> entityIdPrefix = Maps.newHashMap();
 	static {
 		entityIdPrefix.put("datasource", "10");
 		entityIdPrefix.put("organization", "20");
 		entityIdPrefix.put("project", "40");
 		entityIdPrefix.put("result", "50");
 	}
 	public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
 	static {
 		relationInverseMap
 			.put(
 				"personResult_authorship_isAuthorOf", new RelationInverse()
 					.setRelation("isAuthorOf")
 					.setInverse("hasAuthor")
 					.setRelType("personResult")
 					.setSubReltype("authorship"));
 		relationInverseMap
 			.put(
 				"personResult_authorship_hasAuthor", new RelationInverse()
 					.setInverse("isAuthorOf")
 					.setRelation("hasAuthor")
 					.setRelType("personResult")
 					.setSubReltype("authorship"));
 		relationInverseMap
 			.put(
 				"projectOrganization_participation_isParticipant", new RelationInverse()
 					.setRelation("isParticipant")
 					.setInverse("hasParticipant")
 					.setRelType("projectOrganization")
 					.setSubReltype("participation"));
 		relationInverseMap
 			.put(
 				"projectOrganization_participation_hasParticipant", new RelationInverse()
 					.setInverse("isParticipant")
 					.setRelation("hasParticipant")
 					.setRelType("projectOrganization")
 					.setSubReltype("participation"));
 		relationInverseMap
 			.put(
 				"resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse()
 					.setRelation("hasAuthorInstitution")
 					.setInverse("isAuthorInstitutionOf")
 					.setRelType("resultOrganization")
 					.setSubReltype("affiliation"));
 		relationInverseMap
 			.put(
 				"resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse()
 					.setInverse("hasAuthorInstitution")
 					.setRelation("isAuthorInstitutionOf")
 					.setRelType("resultOrganization")
 					.setSubReltype("affiliation"));
 		relationInverseMap
 			.put(
 				"organizationOrganization_dedup_merges", new RelationInverse()
 					.setRelation("merges")
 					.setInverse("isMergedIn")
 					.setRelType("organizationOrganization")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"organizationOrganization_dedup_isMergedIn", new RelationInverse()
 					.setInverse("merges")
 					.setRelation("isMergedIn")
 					.setRelType("organizationOrganization")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse()
 					.setInverse("isSimilarTo")
 					.setRelation("isSimilarTo")
 					.setRelType("organizationOrganization")
 					.setSubReltype("dedupSimilarity"));
 		relationInverseMap
 			.put(
 				"resultProject_outcome_isProducedBy", new RelationInverse()
 					.setRelation("isProducedBy")
 					.setInverse("produces")
 					.setRelType("resultProject")
 					.setSubReltype("outcome"));
 		relationInverseMap
 			.put(
 				"resultProject_outcome_produces", new RelationInverse()
 					.setInverse("isProducedBy")
 					.setRelation("produces")
 					.setRelType("resultProject")
 					.setSubReltype("outcome"));
 		relationInverseMap
 			.put(
 				"projectPerson_contactPerson_isContact", new RelationInverse()
 					.setRelation("isContact")
 					.setInverse("hasContact")
 					.setRelType("projectPerson")
 					.setSubReltype("contactPerson"));
 		relationInverseMap
 			.put(
 				"projectPerson_contactPerson_hasContact", new RelationInverse()
 					.setInverse("isContact")
 					.setRelation("hasContact")
 					.setRelType("personPerson")
 					.setSubReltype("coAuthorship"));
 		relationInverseMap
 			.put(
 				"personPerson_coAuthorship_isCoauthorOf", new RelationInverse()
 					.setInverse("isCoAuthorOf")
 					.setRelation("isCoAuthorOf")
 					.setRelType("personPerson")
 					.setSubReltype("coAuthorship"));
 		relationInverseMap
 			.put(
 				"personPerson_dedup_merges", new RelationInverse()
 					.setInverse("isMergedIn")
 					.setRelation("merges")
 					.setRelType("personPerson")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"personPerson_dedup_isMergedIn", new RelationInverse()
 					.setInverse("merges")
 					.setRelation("isMergedIn")
 					.setRelType("personPerson")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"personPerson_dedupSimilarity_isSimilarTo", new RelationInverse()
 					.setInverse("isSimilarTo")
 					.setRelation("isSimilarTo")
 					.setRelType("personPerson")
 					.setSubReltype("dedupSimilarity"));
 		relationInverseMap
 			.put(
 				"datasourceOrganization_provision_isProvidedBy", new RelationInverse()
 					.setInverse("provides")
 					.setRelation("isProvidedBy")
 					.setRelType("datasourceOrganization")
 					.setSubReltype("provision"));
 		relationInverseMap
 			.put(
 				"datasourceOrganization_provision_provides", new RelationInverse()
 					.setInverse("isProvidedBy")
 					.setRelation("provides")
 					.setRelType("datasourceOrganization")
 					.setSubReltype("provision"));
 		relationInverseMap
 			.put(
 				"resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse()
 					.setInverse("isAmongTopNSimilarDocuments")
 					.setRelation("hasAmongTopNSimilarDocuments")
 					.setRelType("resultResult")
 					.setSubReltype("similarity"));
 		relationInverseMap
 			.put(
 				"resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse()
 					.setInverse("hasAmongTopNSimilarDocuments")
 					.setRelation("isAmongTopNSimilarDocuments")
 					.setRelType("resultResult")
 					.setSubReltype("similarity"));
 		relationInverseMap
 			.put(
 				"resultResult_relationship_isRelatedTo", new RelationInverse()
 					.setInverse("isRelatedTo")
 					.setRelation("isRelatedTo")
 					.setRelType("resultResult")
 					.setSubReltype("relationship"));
 		relationInverseMap
 			.put(
 				"resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse()
 					.setInverse("hasAmongTopNSimilarDocuments")
 					.setRelation("isAmongTopNSimilarDocuments")
 					.setRelType("resultResult")
 					.setSubReltype("similarity"));
 		relationInverseMap
 			.put(
 				"resultResult_supplement_isSupplementTo", new RelationInverse()
 					.setInverse("isSupplementedBy")
 					.setRelation("isSupplementTo")
 					.setRelType("resultResult")
 					.setSubReltype("supplement"));
 		relationInverseMap
 			.put(
 				"resultResult_supplement_isSupplementedBy", new RelationInverse()
 					.setInverse("isSupplementTo")
 					.setRelation("isSupplementedBy")
 					.setRelType("resultResult")
 					.setSubReltype("supplement"));
 		relationInverseMap
 			.put(
 				"resultResult_part_isPartOf", new RelationInverse()
 					.setInverse("hasPart")
 					.setRelation("isPartOf")
 					.setRelType("resultResult")
 					.setSubReltype("part"));
 		relationInverseMap
 			.put(
 				"resultResult_part_hasPart", new RelationInverse()
 					.setInverse("isPartOf")
 					.setRelation("hasPart")
 					.setRelType("resultResult")
 					.setSubReltype("part"));
 		relationInverseMap
 			.put(
 				"resultResult_dedup_merges", new RelationInverse()
 					.setInverse("isMergedIn")
 					.setRelation("merges")
 					.setRelType("resultResult")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"resultResult_dedup_isMergedIn", new RelationInverse()
 					.setInverse("merges")
 					.setRelation("isMergedIn")
 					.setRelType("resultResult")
 					.setSubReltype("dedup"));
 		relationInverseMap
 			.put(
 				"resultResult_dedupSimilarity_isSimilarTo", new RelationInverse()
 					.setInverse("isSimilarTo")
 					.setRelation("isSimilarTo")
 					.setRelType("resultResult")
 					.setSubReltype("dedupSimilarity"));
 	}
 	private static final String schemeTemplate = "dnet:%s_%s_relations";
 	private ModelSupport() {
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java
@ -0,0 +1,46 @@
 package eu.dnetlib.dhp.schema.common;
 public class RelationInverse {
 	private String relation;
 	private String inverse;
 	private String relType;
 	private String subReltype;
 	public String getRelType() {
 		return relType;
 	}
 	public RelationInverse setRelType(String relType) {
 		this.relType = relType;
 		return this;
 	}
 	public String getSubReltype() {
 		return subReltype;
 	}
 	public RelationInverse setSubReltype(String subReltype) {
 		this.subReltype = subReltype;
 		return this;
 	}
 	public String getRelation() {
 		return relation;
 	}
 	public RelationInverse setRelation(String relation) {
 		this.relation = relation;
 		return this;
 	}
 	public String getInverse() {
 		return inverse;
 	}
 	public RelationInverse setInverse(String inverse) {
 		this.inverse = inverse;
 		return this;
 	}
 }
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java
@ -2,8 +2,7 @@
 package eu.dnetlib.dhp.schema.oaf;
 import java.io.Serializable;
-import java.util.List;
+import java.util.*;
 import java.util.Objects;
 public class Author implements Serializable {
@ -86,4 +85,5 @@ public class Author implements Serializable {
 	public int hashCode() {
 		return Objects.hash(fullname, name, surname, rank, pid, affiliation);
 	}
 }
--- a/dhp-workflows/dhp-blacklist/pom.xml
+++ b/dhp-workflows/dhp-blacklist/pom.xml
@ -0,0 +1,36 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
        <version>1.2.1-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
    <artifactId>dhp-blacklist</artifactId>
    <dependencies>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-common</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-schemas</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
        </dependency>
    </dependencies>
 </project>
--- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java
+++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java
@ -0,0 +1,87 @@
 package eu.dnetlib.dhp.blacklist;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class PrepareMergedRelationJob {
 	private static final Logger log = LoggerFactory.getLogger(PrepareMergedRelationJob.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareMergedRelationJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/blacklist/input_preparerelation_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		String outputPath = parser.get("outputPath");
 		log.info("outputPath: {} ", outputPath);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				selectMergesRelations(
 					spark,
 					inputPath,
 					outputPath);
 			});
 	}
 	private static void selectMergesRelations(SparkSession spark, String inputPath, String outputPath) {
 		Dataset<Relation> relation = readRelations(spark, inputPath);
 		relation
 			.filter("relclass = 'merges' and datainfo.deletedbyinference=false")
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	public static org.apache.spark.sql.Dataset<Relation> readRelations(
 		SparkSession spark, String inputPath) {
 		return spark
 			.read()
 			.textFile(inputPath)
 			.map(
 				(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
 				Encoders.bean(Relation.class));
 	}
 }
--- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java
+++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java
@ -0,0 +1,141 @@
 package eu.dnetlib.dhp.blacklist;
 import java.io.BufferedWriter;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.nio.charset.StandardCharsets;
 import java.sql.ResultSet;
 import java.util.Arrays;
 import java.util.List;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.DbClient;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.common.RelationInverse;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class ReadBlacklistFromDB implements Closeable {
 	private final DbClient dbClient;
 	private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class);
 	private final Configuration conf;
 	private final BufferedWriter writer;
 	private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " +
 		"target_type, unnest(original_target_objects) as target, " +
 		"relationship FROM blacklist WHERE status = 'ACCEPTED'";
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					ReadBlacklistFromDB.class
 						.getResourceAsStream(
 							"/eu/dnetlib/dhp/blacklist/blacklist_parameters.json")));
 		parser.parseArgument(args);
 		final String dbUrl = parser.get("postgresUrl");
 		final String dbUser = parser.get("postgresUser");
 		final String dbPassword = parser.get("postgresPassword");
 		final String hdfsPath = parser.get("hdfsPath") + "/blacklist";
 		final String hdfsNameNode = parser.get("hdfsNameNode");
 		try (final ReadBlacklistFromDB rbl = new ReadBlacklistFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser,
 			dbPassword)) {
 			log.info("Processing blacklist...");
 			rbl.execute(query, rbl::processBlacklistEntry);
 		}
 	}
 	public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) throws Exception {
 		final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r));
 		dbClient.processResults(sql, consumer);
 	}
 	public List<Relation> processBlacklistEntry(ResultSet rs) {
 		try {
 			Relation direct = new Relation();
 			Relation inverse = new Relation();
 			String source_prefix = ModelSupport.entityIdPrefix.get(rs.getString("source_type"));
 			String target_prefix = ModelSupport.entityIdPrefix.get(rs.getString("target_type"));
 			String source_direct = source_prefix + "|" + rs.getString("source");
 			direct.setSource(source_direct);
 			inverse.setTarget(source_direct);
 			String target_direct = target_prefix + "|" + rs.getString("target");
 			direct.setTarget(target_direct);
 			inverse.setSource(target_direct);
 			String encoding = rs.getString("relationship");
 			RelationInverse ri = ModelSupport.relationInverseMap.get(encoding);
 			direct.setRelClass(ri.getRelation());
 			inverse.setRelClass(ri.getInverse());
 			direct.setRelType(ri.getRelType());
 			inverse.setRelType(ri.getRelType());
 			direct.setSubRelType(ri.getSubReltype());
 			inverse.setSubRelType(ri.getSubReltype());
 			return Arrays.asList(direct, inverse);
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	@Override
 	public void close() throws IOException {
 		dbClient.close();
 		writer.close();
 	}
 	public ReadBlacklistFromDB(
 		final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
 		throws Exception {
 		this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
 		this.conf = new Configuration();
 		this.conf.set("fs.defaultFS", hdfsNameNode);
 		FileSystem fileSystem = FileSystem.get(this.conf);
 		Path hdfsWritePath = new Path(hdfsPath);
 		FSDataOutputStream fsDataOutputStream = null;
 		if (fileSystem.exists(hdfsWritePath)) {
 			fsDataOutputStream = fileSystem.append(hdfsWritePath);
 		} else {
 			fsDataOutputStream = fileSystem.create(hdfsWritePath);
 		}
 		this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
 	}
 	protected void writeRelation(final Relation r) {
 		try {
 			writer.write(OBJECT_MAPPER.writeValueAsString(r));
 			writer.newLine();
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 }
--- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java
+++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java
@ -0,0 +1,147 @@
 package eu.dnetlib.dhp.blacklist;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.Objects;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;
 public class SparkRemoveBlacklistedRelationJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkRemoveBlacklistedRelationJob.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkRemoveBlacklistedRelationJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/blacklist/sparkblacklist_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}: ", outputPath);
 		final String blacklistPath = parser.get("hdfsPath");
 		log.info("blacklistPath {}: ", blacklistPath);
 		final String mergesPath = parser.get("mergesPath");
 		log.info("mergesPath {}: ", mergesPath);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				removeBlacklistedRelations(
 					spark,
 					blacklistPath,
 					inputPath,
 					outputPath,
 					mergesPath);
 			});
 	}
 	private static void removeBlacklistedRelations(SparkSession spark, String blacklistPath, String inputPath,
 		String outputPath, String mergesPath) {
 		Dataset<Relation> blackListed = readRelations(spark, blacklistPath + "/blacklist");
 		Dataset<Relation> inputRelation = readRelations(spark, inputPath);
 		Dataset<Relation> mergesRelation = readRelations(spark, mergesPath);
 		log.info("InputRelationCount: {}", inputRelation.count());
 		Dataset<Relation> dedupSource = blackListed
 			.joinWith(
 				mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")),
 				"left_outer")
 			.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
 				Optional
 					.ofNullable(c._2())
 					.ifPresent(mr -> c._1().setSource(mr.getSource()));
 				return c._1();
 			}, Encoders.bean(Relation.class));
 		Dataset<Relation> dedupBL = dedupSource
 			.joinWith(
 				mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")),
 				"left_outer")
 			.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
 				Optional
 					.ofNullable(c._2())
 					.ifPresent(mr -> c._1().setTarget(mr.getSource()));
 				return c._1();
 			}, Encoders.bean(Relation.class));
 		dedupBL
 			.write()
 			.mode(SaveMode.Overwrite)
 			.json(blacklistPath + "/deduped");
 		inputRelation
 			.joinWith(
 				dedupBL, (inputRelation
 					.col("source")
 					.equalTo(dedupBL.col("source"))
 					.and(
 						inputRelation
 							.col("target")
 							.equalTo(dedupBL.col("target")))),
 				"left_outer")
 			.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
 				Relation ir = c._1();
 				Optional<Relation> obl = Optional.ofNullable(c._2());
 				if (obl.isPresent()) {
 					if (ir.equals(obl.get())) {
 						return null;
 					}
 				}
 				return ir;
 			}, Encoders.bean(Relation.class))
 			.filter(Objects::nonNull)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	public static org.apache.spark.sql.Dataset<Relation> readRelations(
 		SparkSession spark, String inputPath) {
 		return spark
 			.read()
 			.textFile(inputPath)
 			.map(
 				(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
 				Encoders.bean(Relation.class));
 	}
 }
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json
@ -0,0 +1,32 @@
 [
 	{
 		"paramName": "p",
 		"paramLongName": "hdfsPath",
 		"paramDescription": "the path where storing the sequential file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "nn",
 		"paramLongName": "hdfsNameNode",
 		"paramDescription": "the name node on hdfs",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pgurl",
 		"paramLongName": "postgresUrl",
 		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pguser",
 		"paramLongName": "postgresUser",
 		"paramDescription": "postgres user",
 		"paramRequired": false
 	},
 	{
 		"paramName": "pgpasswd",
 		"paramLongName": "postgresPassword",
 		"paramDescription": "postgres password",
 		"paramRequired": false
 	}
 ]
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/input_preparerelation_parameters.json
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/input_preparerelation_parameters.json
@ -0,0 +1,26 @@
 [
 	{
 		"paramName": "s",
 		"paramLongName": "sourcePath",
 		"paramDescription": "the path to the graph used to remove the relations ",
 		"paramRequired": true
 	},
 	{
 		"paramName": "out",
 		"paramLongName": "outputPath",
 		"paramDescription": "the path where to store the temporary result ",
 		"paramRequired": true
 	},
 	{
 		"paramName": "issm",
 		"paramLongName": "isSparkSessionManaged",
 		"paramDescription": "true if the spark session is managed",
 		"paramRequired": false
 	},
 	{
 		"paramName":"h",
 		"paramLongName":"hive_metastore_uris",
 		"paramDescription": "the hive metastore uris",
 		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml
@ -0,0 +1,54 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
@ -0,0 +1,195 @@
 <workflow-app name="blacklist_relations" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>postgresURL</name>
            <description>the url of the postgress server to query</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the username to access the postgres db</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the postgres password</description>
        </property>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the graph output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_publication"/>
        <path start="copy_dataset"/>
        <path start="copy_orp"/>
        <path start="copy_software"/>
        <path start="copy_datasource"/>
        <path start="copy_project"/>
        <path start="copy_organization"/>
    </fork>
    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_project">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasource">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="read_blacklist"/>
    <action name="read_blacklist">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.blacklist.ReadBlacklistFromDB</main-class>
            <arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
            <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="prepare_merged_relation"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_merged_relation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareMergedRelation</name>
            <class>eu.dnetlib.dhp.blacklist.PrepareMergedRelationJob</class>
            <jar>dhp-blacklist-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/mergesRelation</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
        </spark>
        <ok to="apply_blacklist"/>
        <error to="Kill"/>
    </action>
    <action name="apply_blacklist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ApplyBlacklist</name>
            <class>eu.dnetlib.dhp.blacklist.SparkRemoveBlacklistedRelationJob</class>
            <jar>dhp-blacklist-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
            <arg>--mergesPath</arg><arg>${workingDir}/mergesRelation</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/sparkblacklist_parameters.json
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/sparkblacklist_parameters.json
@ -0,0 +1,33 @@
 [
 	{
 		"paramName": "p",
 		"paramLongName": "hdfsPath",
 		"paramDescription": "the path where storing the sequential file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "s",
 		"paramLongName": "sourcePath",
 		"paramDescription": "the path to the graph used to remove the relations ",
 		"paramRequired": true
 	},
 	{
 		"paramName": "out",
 		"paramLongName": "outputPath",
 		"paramDescription": "the path where to store the temporary result ",
 		"paramRequired": true
 	},
 	{
 		"paramName": "issm",
 		"paramLongName": "isSparkSessionManaged",
 		"paramDescription": "true if the spark session is managed",
 		"paramRequired": false
 	},
 	{
 		"paramName": "m",
 		"paramLongName": "mergesPath",
 		"paramDescription": "true if the spark session is managed",
 		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java
+++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java
@ -0,0 +1,167 @@
 package eu.dnetlib.dhp.blacklist;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class BlackListTest {
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static final ClassLoader cl = eu.dnetlib.dhp.blacklist.BlackListTest.class.getClassLoader();
 	private static SparkSession spark;
 	private static Path workingDir;
 	private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.blacklist.BlackListTest.class);
 	@BeforeAll
 	public static void beforeAll() throws IOException {
 		workingDir = Files.createTempDirectory(eu.dnetlib.dhp.blacklist.BlackListTest.class.getSimpleName());
 		log.info("using work dir {}", workingDir);
 		SparkConf conf = new SparkConf();
 		conf.setAppName(eu.dnetlib.dhp.blacklist.BlackListTest.class.getSimpleName());
 		conf.setMaster("local[*]");
 		conf.set("spark.driver.host", "localhost");
 		conf.set("hive.metastore.local", "true");
 		conf.set("spark.ui.enabled", "false");
 		conf.set("spark.sql.warehouse.dir", workingDir.toString());
 		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
 		spark = SparkSession
 			.builder()
 			.appName(BlackListTest.class.getSimpleName())
 			.config(conf)
 			.getOrCreate();
 	}
 	@AfterAll
 	public static void afterAll() throws IOException {
 		FileUtils.deleteDirectory(workingDir.toFile());
 		spark.stop();
 	}
 	/*
 	 * String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath =
 	 * parser.get("outputPath"); log.info("outputPath {}: ", outputPath); final String blacklistPath =
 	 * parser.get("hdfsPath"); log.info("blacklistPath {}: ", blacklistPath); final String mergesPath =
 	 * parser.get("mergesPath"); log.info("mergesPath {}: ", mergesPath);
 	 */
 	@Test
 	public void noRemoveTest() throws Exception {
 		SparkRemoveBlacklistedRelationJob
 			.main(
 				new String[] {
 					"-isSparkSessionManaged",
 					Boolean.FALSE.toString(),
 					"-sourcePath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/relationsNoRemoval").getPath(),
 					"-outputPath",
 					workingDir.toString() + "/relation",
 					"-hdfsPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(),
 					"-mergesPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRel").getPath(),
 				});
 		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
 		JavaRDD<Relation> tmp = sc
 			.textFile(workingDir.toString() + "/relation")
 			.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
 		Assertions.assertEquals(13, tmp.count());
 	}
 	@Test
 	public void removeNoMergeMatchTest() throws Exception {
 		SparkRemoveBlacklistedRelationJob
 			.main(
 				new String[] {
 					"-isSparkSessionManaged",
 					Boolean.FALSE.toString(),
 					"-sourcePath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/relationsOneRemoval").getPath(),
 					"-outputPath",
 					workingDir.toString() + "/relation",
 					"-hdfsPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(),
 					"-mergesPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRel").getPath(),
 				});
 		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
 		JavaRDD<Relation> tmp = sc
 			.textFile(workingDir.toString() + "/relation")
 			.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
 		Assertions.assertEquals(12, tmp.count());
 		org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.oaf.Relation> verificationDataset = spark
 			.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class));
 		Assertions
 			.assertEquals(
 				0, verificationDataset
 					.filter(
 						"source = '40|corda__h2020::5161f53ab205d803c36b4c888fe7deef' and " +
 							"target = '20|dedup_wf_001::157af406bc653aa4d9749318b644de43'")
 					.count());
 		Assertions.assertEquals(0, verificationDataset.filter("relClass = 'hasParticipant'").count());
 	}
 	@Test
 	public void removeMergeMatchTest() throws Exception {
 		SparkRemoveBlacklistedRelationJob
 			.main(
 				new String[] {
 					"-isSparkSessionManaged",
 					Boolean.FALSE.toString(),
 					"-sourcePath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/relationOneRemovalWithMatch").getPath(),
 					"-outputPath",
 					workingDir.toString() + "/relation",
 					"-hdfsPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(),
 					"-mergesPath",
 					getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRelOneMerge").getPath(),
 				});
 		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
 		JavaRDD<Relation> tmp = sc
 			.textFile(workingDir.toString() + "/relation")
 			.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
 		Assertions.assertEquals(12, tmp.count());
 		org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.oaf.Relation> verificationDataset = spark
 			.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class));
 		Assertions.assertEquals(12, verificationDataset.filter("relClass = 'isProvidedBy'").count());
 	}
 }
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/blacklist/blacklist
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/blacklist/blacklist
@ -0,0 +1,20 @@
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"projectOrganization","subRelType":"participation","relClass":"hasParticipant","source":"40|corda__h2020::5161f53ab205d803c36b4c888fe7deef","target":"20|dedup_wf_001::157af406bc653aa4d9749318b644de43"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"projectOrganization","subRelType":"participation","relClass":"isParticipant","source":"20|dedup_wf_001::157af406bc653aa4d9749318b644de43","target":"40|corda__h2020::5161f53ab205d803c36b4c888fe7deef"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od_______908::a47e1c3ede9a21ee5278a2e5c338d69b","target":"40|corda_______::189ff31d637eaaeaf4d3584dc490b1cf"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::189ff31d637eaaeaf4d3584dc490b1cf","target":"50|od_______908::a47e1c3ede9a21ee5278a2e5c338d69b"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od________18::a727cc288016db7132ef9a799aa83350","target":"40|corda_______::9826e8aba3e8f3a2a46545cf341838a8"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::9826e8aba3e8f3a2a46545cf341838a8","target":"50|od________18::a727cc288016db7132ef9a799aa83350"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od________18::062cf091d5c7a7d730001c34177042e3","target":"40|corda_______::9826e8aba3e8f3a2a46545cf341838a8"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::9826e8aba3e8f3a2a46545cf341838a8","target":"50|od________18::062cf091d5c7a7d730001c34177042e3"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|webcrawl____::68c191d9b972b47a235d311804c7f6f5","target":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64","target":"50|webcrawl____::68c191d9b972b47a235d311804c7f6f5"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od_______908::1b172ab34639e7935e2357119cf20830","target":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64","target":"50|od_______908::1b172ab34639e7935e2357119cf20830"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|doajarticles::cb234c66327d29ba5f13c0db7a4cf423","target":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::c3d0b21615b129cd7395e24f9cf6bb64","target":"50|doajarticles::cb234c66327d29ba5f13c0db7a4cf423"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od______1146::e2fafaba636a14e408f02c6ea26acb0e","target":"40|corda_______::35695c955c51f0bb39482ce5477047c7"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::35695c955c51f0bb39482ce5477047c7","target":"50|od______1146::e2fafaba636a14e408f02c6ea26acb0e"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|od_______908::b8e86ed982ff331764456e1f0759ed9c","target":"40|corda_______::35695c955c51f0bb39482ce5477047c7"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::35695c955c51f0bb39482ce5477047c7","target":"50|od_______908::b8e86ed982ff331764456e1f0759ed9c"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"isProducedBy","source":"50|webcrawl____::c472bf5944ce0495844d505d43d1c021","target":"40|corda_______::35695c955c51f0bb39482ce5477047c7"}
 {"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|corda_______::35695c955c51f0bb39482ce5477047c7","target":"50|webcrawl____::c472bf5944ce0495844d505d43d1c021"}
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/mergesRel/mergesRel.json
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/mergesRel/mergesRel.json
@ -0,0 +1,14 @@
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|od_______177::67c1385662f2fa0bde310bec15427646"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|doiboost____::8ea1631fa01adcbafc3f384b6a2c5cc3"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|od_______166::67c1385662f2fa0bde310bec15427646"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::d2a45f0f42d8dd66c364219924c37c3f","subRelType":"dedup","target":"50|od_______935::0bf7d9c5d2e1115a31cd558f83ae8ee3"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::d2a45f0f42d8dd66c364219924c37c3f","subRelType":"dedup","target":"50|doajarticles::d695fee344cb367a38ce6622f5fe9430"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|od_______267::14e952745e4b602ff72919aa881b8945"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|doiboost____::43941031067842fac90604d37b2a4149"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|core________::5c62b3ad05a23de613636607a424899d"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|scholexplore::1c467aabe5108ee840a4500d58f19328"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::cd4fc0411683ee762d50bfd30436f95b","subRelType":"dedup","target":"50|doiboost____::0ff61beeb12c49ed8a826b2b1883c8f8"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::cd4fc0411683ee762d50bfd30436f95b","subRelType":"dedup","target":"50|doajarticles::fca1220426b10ccb8b46e4967b353f37"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|doiboost____::dd96d41ee05d4022065c9d3096e1023a"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|erc_________::7d9a29ff323c2fe0ecf037189bf71b8e"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|webcrawl____::fdd999801fec35d4c6190bcabb850c52"}
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/mergesRelOneMerge/mergesRel.json
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/mergesRelOneMerge/mergesRel.json
@ -0,0 +1,14 @@
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|od_______908::a47e1c3ede9a21ee5278a2e5c338d69b"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|doiboost____::8ea1631fa01adcbafc3f384b6a2c5cc3"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"dedup","target":"50|od_______166::67c1385662f2fa0bde310bec15427646"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::d2a45f0f42d8dd66c364219924c37c3f","subRelType":"dedup","target":"50|od_______935::0bf7d9c5d2e1115a31cd558f83ae8ee3"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::d2a45f0f42d8dd66c364219924c37c3f","subRelType":"dedup","target":"50|doajarticles::d695fee344cb367a38ce6622f5fe9430"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|od_______267::14e952745e4b602ff72919aa881b8945"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|doiboost____::43941031067842fac90604d37b2a4149"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|core________::5c62b3ad05a23de613636607a424899d"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::325525b879b17d8059a4e58def2f7225","subRelType":"dedup","target":"50|scholexplore::1c467aabe5108ee840a4500d58f19328"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::cd4fc0411683ee762d50bfd30436f95b","subRelType":"dedup","target":"50|doiboost____::0ff61beeb12c49ed8a826b2b1883c8f8"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::cd4fc0411683ee762d50bfd30436f95b","subRelType":"dedup","target":"50|doajarticles::fca1220426b10ccb8b46e4967b353f37"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|doiboost____::dd96d41ee05d4022065c9d3096e1023a"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|erc_________::7d9a29ff323c2fe0ecf037189bf71b8e"}
 {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"relClass":"merges","source":"50|dedup_wf_001::a87be24a4fcac13c9298f0cc3acfc6ea","subRelType":"dedup","target":"50|webcrawl____::fdd999801fec35d4c6190bcabb850c52"}
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationOneRemovalWithMatch/relations.json
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationOneRemovalWithMatch/relations.json
@ -0,0 +1,13 @@
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProducedBy","relType":"resultProject","source":"50|dedup_wf_001::3668b9bd87532a085dc7a18ce2086715","subRelType":"outcome","target":"40|corda_______::189ff31d637eaaeaf4d3584dc490b1cf"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::05c5c5d2920c01e194d6760f24885a82","subRelType":"provision","target":"20|dedup_wf_001::cd07e6c09886e59266fdbae32a9e319b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::07022f119fc3d1cb66fe84494aa820c9","subRelType":"provision","target":"20|doajarticles::c48e93350cf5287e604ef631f2a67087"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::09ea05970871d7d923caaa8d2416d10e","subRelType":"provision","target":"20|doajarticles::cd84ef51b2de10ff01d679e4e662594e"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0af8c8ecf992b177304eb8f5d978100b","subRelType":"provision","target":"20|doajarticles::4eb6845b141d2b36ed94918d2bf382f0"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0b48a767b2b8d323ccdcaf2d40642746","subRelType":"provision","target":"20|doajarticles::46a4942a4707e842611278cfa26789f9"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0dd9573adad4e5cc322612f6e9ecc8ce","subRelType":"provision","target":"20|doajarticles::e34526e7b5efb700ddb4544700234a0b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0e870ab15f231d954306bb658fc747a2","subRelType":"provision","target":"20|doajarticles::ccac83f4f971e3cdc194ddb796850a37"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f19a2d702e31d451e9806f701584c97","subRelType":"provision","target":"20|doajarticles::7a02d64772c121c1f10c17f8e2bf2aec"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f4b6db6c02966acbfb60af527728c85","subRelType":"provision","target":"20|doajarticles::acd96b3bd87b176202b8ea494c318b21"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::11f65dc66da7ef1b1f3a3e59199e4d70","subRelType":"provision","target":"20|dedup_wf_001::6132363e7458cbd7c22aa284c7df1307"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::149fd06e8702d94aa648641fd1602284","subRelType":"provision","target":"20|dedup_wf_001::35ae35032078bc33bc92e2b0f2ecfa17"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::15581a45537ceb854bbddee49b2942b4","subRelType":"provision","target":"20|doajarticles::0b25b0ce56da469cc8ad74c7d83c16a3"}
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationsNoRemoval/relations.json
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationsNoRemoval/relations.json
@ -0,0 +1,13 @@
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::018cb61ed43c01704decc66183ce5d60","subRelType":"provision","target":"20|dedup_wf_001::b9fff055ce5efacecbe4ef918c127f86"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::05c5c5d2920c01e194d6760f24885a82","subRelType":"provision","target":"20|dedup_wf_001::cd07e6c09886e59266fdbae32a9e319b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::07022f119fc3d1cb66fe84494aa820c9","subRelType":"provision","target":"20|doajarticles::c48e93350cf5287e604ef631f2a67087"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::09ea05970871d7d923caaa8d2416d10e","subRelType":"provision","target":"20|doajarticles::cd84ef51b2de10ff01d679e4e662594e"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0af8c8ecf992b177304eb8f5d978100b","subRelType":"provision","target":"20|doajarticles::4eb6845b141d2b36ed94918d2bf382f0"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0b48a767b2b8d323ccdcaf2d40642746","subRelType":"provision","target":"20|doajarticles::46a4942a4707e842611278cfa26789f9"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0dd9573adad4e5cc322612f6e9ecc8ce","subRelType":"provision","target":"20|doajarticles::e34526e7b5efb700ddb4544700234a0b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0e870ab15f231d954306bb658fc747a2","subRelType":"provision","target":"20|doajarticles::ccac83f4f971e3cdc194ddb796850a37"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f19a2d702e31d451e9806f701584c97","subRelType":"provision","target":"20|doajarticles::7a02d64772c121c1f10c17f8e2bf2aec"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f4b6db6c02966acbfb60af527728c85","subRelType":"provision","target":"20|doajarticles::acd96b3bd87b176202b8ea494c318b21"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::11f65dc66da7ef1b1f3a3e59199e4d70","subRelType":"provision","target":"20|dedup_wf_001::6132363e7458cbd7c22aa284c7df1307"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::149fd06e8702d94aa648641fd1602284","subRelType":"provision","target":"20|dedup_wf_001::35ae35032078bc33bc92e2b0f2ecfa17"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::15581a45537ceb854bbddee49b2942b4","subRelType":"provision","target":"20|doajarticles::0b25b0ce56da469cc8ad74c7d83c16a3"}
--- a/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationsOneRemoval/relationsOneRemove.json
+++ b/dhp-workflows/dhp-blacklist/src/test/resources/eu/dnetlib/dhp/blacklist/relationsOneRemoval/relationsOneRemove.json
@ -0,0 +1,13 @@
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda__h2020::5161f53ab205d803c36b4c888fe7deef","subRelType":"participation","target":"20|dedup_wf_001::157af406bc653aa4d9749318b644de43"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::05c5c5d2920c01e194d6760f24885a82","subRelType":"provision","target":"20|dedup_wf_001::cd07e6c09886e59266fdbae32a9e319b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::07022f119fc3d1cb66fe84494aa820c9","subRelType":"provision","target":"20|doajarticles::c48e93350cf5287e604ef631f2a67087"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::09ea05970871d7d923caaa8d2416d10e","subRelType":"provision","target":"20|doajarticles::cd84ef51b2de10ff01d679e4e662594e"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0af8c8ecf992b177304eb8f5d978100b","subRelType":"provision","target":"20|doajarticles::4eb6845b141d2b36ed94918d2bf382f0"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0b48a767b2b8d323ccdcaf2d40642746","subRelType":"provision","target":"20|doajarticles::46a4942a4707e842611278cfa26789f9"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0dd9573adad4e5cc322612f6e9ecc8ce","subRelType":"provision","target":"20|doajarticles::e34526e7b5efb700ddb4544700234a0b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0e870ab15f231d954306bb658fc747a2","subRelType":"provision","target":"20|doajarticles::ccac83f4f971e3cdc194ddb796850a37"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f19a2d702e31d451e9806f701584c97","subRelType":"provision","target":"20|doajarticles::7a02d64772c121c1f10c17f8e2bf2aec"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::0f4b6db6c02966acbfb60af527728c85","subRelType":"provision","target":"20|doajarticles::acd96b3bd87b176202b8ea494c318b21"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::11f65dc66da7ef1b1f3a3e59199e4d70","subRelType":"provision","target":"20|dedup_wf_001::6132363e7458cbd7c22aa284c7df1307"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::149fd06e8702d94aa648641fd1602284","subRelType":"provision","target":"20|dedup_wf_001::35ae35032078bc33bc92e2b0f2ecfa17"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::15581a45537ceb854bbddee49b2942b4","subRelType":"provision","target":"20|doajarticles::0b25b0ce56da469cc8ad74c7d83c16a3"}
--- a/dhp-workflows/dhp-enrichment/pom.xml
+++ b/dhp-workflows/dhp-enrichment/pom.xml
@ -0,0 +1,64 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>dhp-workflows</artifactId>
        <groupId>eu.dnetlib.dhp</groupId>
        <version>1.2.1-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
    <artifactId>dhp-enrichment</artifactId>
    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-common</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-schemas</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
        </dependency>
        <dependency>
            <groupId>jaxen</groupId>
            <artifactId>jaxen</artifactId>
        </dependency>
        <dependency>
            <groupId>com.jayway.jsonpath</groupId>
            <artifactId>json-path</artifactId>
        </dependency>
        <dependency>
            <groupId>io.github.classgraph</groupId>
            <artifactId>classgraph</artifactId>
            <version>4.8.71</version>
        </dependency>
    </dependencies>
 </project>
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java
@ -0,0 +1,169 @@
 package eu.dnetlib.dhp;
 import java.util.List;
 import java.util.Optional;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
 import eu.dnetlib.dhp.schema.oaf.*;
 public class PropagationConstant {
 	public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional";
 	public static final String PROPAGATION_DATA_INFO_TYPE = "propagation";
 	public static final String TRUE = "true";
 	public static final String DNET_COUNTRY_SCHEMA = "dnet:countries";
 	public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions";
 	public static final String DNET_SCHEMA_ID = "dnet:provenanceActions";
 	public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_ID = "country:instrepos";
 	public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME = "Propagation of country to result collected from datasources of type institutional repositories";
 	public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID = "result:organization:instrepo";
 	public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository";
 	public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel";
 	public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation";
 	public static final String PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID = "result:community:semrel";
 	public static final String PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME = " Propagation of result belonging to community through semantic relation";
 	public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization";
 	public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization";
 	public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
 	public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
 	public static final String RELATION_DATASOURCE_ORGANIZATION_REL_CLASS = "isProvidedBy";
 	public static final String RELATION_RESULTORGANIZATION_REL_TYPE = "resultOrganization";
 	public static final String RELATION_RESULTORGANIZATION_SUBREL_TYPE = "affiliation";
 	public static final String RELATION_ORGANIZATION_RESULT_REL_CLASS = "isAuthorInstitutionOf";
 	public static final String RELATION_RESULT_ORGANIZATION_REL_CLASS = "hasAuthorInstitution";
 	public static final String RELATION_RESULTRESULT_REL_TYPE = "resultResult";
 	public static final String RELATION_RESULTPROJECT_REL_TYPE = "resultProject";
 	public static final String RELATION_RESULTPROJECT_SUBREL_TYPE = "outcome";
 	public static final String RELATION_RESULT_PROJECT_REL_CLASS = "isProducedBy";
 	public static final String RELATION_PROJECT_RESULT_REL_CLASS = "produces";
 	public static final String RELATION_REPRESENTATIVERESULT_RESULT_CLASS = "merges";
 	public static final String PROPAGATION_AUTHOR_PID = "ORCID";
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
 		+
 		"from result r " +
 		"lateral view explode(instance) i as inst " +
 		"where r.datainfo.deletedbyinference=false";
 	public static Country getCountry(String classid, String classname) {
 		Country nc = new Country();
 		nc.setClassid(classid);
 		nc.setClassname(classname);
 		nc.setSchemename(DNET_COUNTRY_SCHEMA);
 		nc.setSchemeid(DNET_COUNTRY_SCHEMA);
 		nc
 			.setDataInfo(
 				getDataInfo(
 					PROPAGATION_DATA_INFO_TYPE,
 					PROPAGATION_COUNTRY_INSTREPO_CLASS_ID,
 					PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME));
 		return nc;
 	}
 	public static DataInfo getDataInfo(
 		String inference_provenance, String inference_class_id, String inference_class_name) {
 		DataInfo di = new DataInfo();
 		di.setInferred(true);
 		di.setDeletedbyinference(false);
 		di.setTrust("0.85");
 		di.setInferenceprovenance(inference_provenance);
 		di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
 		return di;
 	}
 	public static Qualifier getQualifier(String inference_class_id, String inference_class_name) {
 		Qualifier pa = new Qualifier();
 		pa.setClassid(inference_class_id);
 		pa.setClassname(inference_class_name);
 		pa.setSchemeid(DNET_SCHEMA_ID);
 		pa.setSchemename(DNET_SCHEMA_NAME);
 		return pa;
 	}
 	public static Relation getRelation(
 		String source,
 		String target,
 		String rel_class,
 		String rel_type,
 		String subrel_type,
 		String inference_provenance,
 		String inference_class_id,
 		String inference_class_name) {
 		Relation r = new Relation();
 		r.setSource(source);
 		r.setTarget(target);
 		r.setRelClass(rel_class);
 		r.setRelType(rel_type);
 		r.setSubRelType(subrel_type);
 		r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name));
 		return r;
 	}
 	public static String getConstraintList(String text, List<String> constraints) {
 		String ret = " and (" + text + constraints.get(0) + "'";
 		for (int i = 1; i < constraints.size(); i++) {
 			ret += " OR " + text + constraints.get(i) + "'";
 		}
 		ret += ")";
 		return ret;
 	}
 	public static void removeOutputDir(SparkSession spark, String path) {
 		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}
 	public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) {
 		return Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 	}
 	public static Boolean isTest(ArgumentApplicationParser parser) {
 		return Optional
 			.ofNullable(parser.get("isTest"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.FALSE);
 	}
 	public static void createCfHbforResult(SparkSession spark) {
 		org.apache.spark.sql.Dataset<Row> cfhb = spark.sql(cfHbforResultQuery);
 		cfhb.createOrReplaceTempView("cfhb");
 	}
 	public static <R> Dataset<R> readPath(
 		SparkSession spark, String inputPath, Class<R> clazz) {
 		return spark
 			.read()
 			.textFile(inputPath)
 			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -0,0 +1,120 @@
 package eu.dnetlib.dhp.bulktag;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.bulktag.community.*;
 import eu.dnetlib.dhp.schema.oaf.Result;
 public class SparkBulkTagJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class);
 	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkBulkTagJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		Boolean isTest = Optional
 			.ofNullable(parser.get("isTest"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.FALSE);
 		log.info("isTest: {} ", isTest);
 		final String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
 		log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		CommunityConfiguration cc;
 		String taggingConf = parser.get("taggingConf");
 		if (isTest) {
 			cc = CommunityConfigurationFactory.newInstance(taggingConf);
 		} else {
 			cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
 		}
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
 			});
 	}
 	private static <R extends Result> void execBulkTag(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		ProtoMap protoMappingParams,
 		Class<R> resultClazz,
 		CommunityConfiguration communityConfiguration) {
 		ResultTagger resultTagger = new ResultTagger();
 		readPath(spark, inputPath, resultClazz)
 			.map(
 				(MapFunction<R, R>) value -> resultTagger
 					.enrichContextCriteria(
 						value, communityConfiguration, protoMappingParams),
 				Encoders.bean(resultClazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	public static <R> Dataset<R> readPath(
 		SparkSession spark, String inputPath, Class<R> clazz) {
 		return spark
 			.read()
 			.textFile(inputPath)
 			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java
@ -0,0 +1,65 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import com.google.gson.Gson;
 /** Created by miriam on 01/08/2018. */
 public class Community implements Serializable {
 	private static final Log log = LogFactory.getLog(Community.class);
 	private String id;
 	private List<String> subjects = new ArrayList<>();
 	private List<Provider> providers = new ArrayList<>();
 	private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
 	public String toJson() {
 		final Gson g = new Gson();
 		return g.toJson(this);
 	}
 	public boolean isValid() {
 		return !getSubjects().isEmpty()
 			|| !getProviders().isEmpty()
 			|| !getZenodoCommunities().isEmpty();
 	}
 	public String getId() {
 		return id;
 	}
 	public void setId(String id) {
 		this.id = id;
 	}
 	public List<String> getSubjects() {
 		return subjects;
 	}
 	public void setSubjects(List<String> subjects) {
 		this.subjects = subjects;
 	}
 	public List<Provider> getProviders() {
 		return providers;
 	}
 	public void setProviders(List<Provider> providers) {
 		this.providers = providers;
 	}
 	public List<ZenodoCommunity> getZenodoCommunities() {
 		return zenodoCommunities;
 	}
 	public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
 		this.zenodoCommunities = zenodoCommunities;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java
@ -0,0 +1,196 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
 import eu.dnetlib.dhp.bulktag.criteria.Selection;
 /** Created by miriam on 02/08/2018. */
 public class CommunityConfiguration implements Serializable {
 	private static final Log log = LogFactory.getLog(CommunityConfiguration.class);
 	private Map<String, Community> communities;
 	// map subject -> communityid
 	private Map<String, List<Pair<String, SelectionConstraints>>> subjectMap = new HashMap<>();
 	// map datasourceid -> communityid
 	private Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap = new HashMap<>();
 	// map zenodocommunityid -> communityid
 	private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
 	public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
 		return subjectMap;
 	}
 	public void setSubjectMap(Map<String, List<Pair<String, SelectionConstraints>>> subjectMap) {
 		this.subjectMap = subjectMap;
 	}
 	public Map<String, List<Pair<String, SelectionConstraints>>> getDatasourceMap() {
 		return datasourceMap;
 	}
 	public void setDatasourceMap(
 		Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap) {
 		this.datasourceMap = datasourceMap;
 	}
 	public Map<String, List<Pair<String, SelectionConstraints>>> getZenodocommunityMap() {
 		return zenodocommunityMap;
 	}
 	public void setZenodocommunityMap(
 		Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap) {
 		this.zenodocommunityMap = zenodocommunityMap;
 	}
 	CommunityConfiguration(final Map<String, Community> communities) {
 		this.communities = communities;
 		init();
 	}
 	void init() {
 		if (subjectMap == null) {
 			subjectMap = Maps.newHashMap();
 		}
 		if (datasourceMap == null) {
 			datasourceMap = Maps.newHashMap();
 		}
 		if (zenodocommunityMap == null) {
 			zenodocommunityMap = Maps.newHashMap();
 		}
 		for (Community c : getCommunities().values()) {
 			// get subjects
 			final String id = c.getId();
 			for (String sbj : c.getSubjects()) {
 				Pair<String, SelectionConstraints> p = new Pair<>(id, new SelectionConstraints());
 				add(sbj.toLowerCase().trim(), p, subjectMap);
 			}
 			// get datasources
 			for (Provider d : c.getProviders()) {
 				add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
 			}
 			// get zenodo communities
 			for (ZenodoCommunity zc : c.getZenodoCommunities()) {
 				add(
 					zc.getZenodoCommunityId(),
 					new Pair<>(id, zc.getSelCriteria()),
 					zenodocommunityMap);
 			}
 		}
 	}
 	private void add(
 		String key,
 		Pair<String, SelectionConstraints> value,
 		Map<String, List<Pair<String, SelectionConstraints>>> map) {
 		List<Pair<String, SelectionConstraints>> values = map.get(key);
 		if (values == null) {
 			values = new ArrayList<>();
 			map.put(key, values);
 		}
 		values.add(value);
 	}
 	public List<Pair<String, SelectionConstraints>> getCommunityForSubject(String sbj) {
 		return subjectMap.get(sbj);
 	}
 	public List<Pair<String, SelectionConstraints>> getCommunityForDatasource(String dts) {
 		return datasourceMap.get(dts);
 	}
 	public List<String> getCommunityForDatasource(
 		final String dts, final Map<String, List<String>> param) {
 		List<Pair<String, SelectionConstraints>> lp = datasourceMap.get(dts);
 		if (lp == null)
 			return Lists.newArrayList();
 		return lp
 			.stream()
 			.map(
 				p -> {
 					if (p.getSnd() == null)
 						return p.getFst();
 					if (((SelectionConstraints) p.getSnd()).verifyCriteria(param))
 						return p.getFst();
 					else
 						return null;
 				})
 			.filter(st -> (st != null))
 			.collect(Collectors.toList());
 	}
 	public List<Pair<String, SelectionConstraints>> getCommunityForZenodoCommunity(String zc) {
 		return zenodocommunityMap.get(zc);
 	}
 	public List<String> getCommunityForSubjectValue(String value) {
 		return getContextIds(subjectMap.get(value));
 	}
 	public List<String> getCommunityForDatasourceValue(String value) {
 		return getContextIds(datasourceMap.get(value.toLowerCase()));
 	}
 	public List<String> getCommunityForZenodoCommunityValue(String value) {
 		return getContextIds(zenodocommunityMap.get(value.toLowerCase()));
 	}
 	private List<String> getContextIds(List<Pair<String, SelectionConstraints>> list) {
 		if (list != null) {
 			return list.stream().map(p -> p.getFst()).collect(Collectors.toList());
 		}
 		return Lists.newArrayList();
 	}
 	public Map<String, Community> getCommunities() {
 		return communities;
 	}
 	public void setCommunities(Map<String, Community> communities) {
 		this.communities = communities;
 	}
 	public String toJson() {
 		GsonBuilder builder = new GsonBuilder();
 		builder.registerTypeAdapter(Selection.class, new InterfaceAdapter());
 		Gson gson = builder.create();
 		return gson.toJson(this);
 	}
 	public int size() {
 		return communities.keySet().size();
 	}
 	public Community getCommunityById(String id) {
 		return communities.get(id);
 	}
 	public List<Community> getCommunityList() {
 		return Lists.newLinkedList(communities.values());
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java
@ -0,0 +1,138 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Node;
 import org.dom4j.io.SAXReader;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
 import eu.dnetlib.dhp.bulktag.criteria.Selection;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
 /** Created by miriam on 03/08/2018. */
 public class CommunityConfigurationFactory {
 	private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class);
 	private static VerbResolver resolver = VerbResolverFactory.newInstance();
 	public static CommunityConfiguration newInstance(final String xml) throws DocumentException {
 		log.debug(String.format("parsing community configuration from:\n%s", xml));
 		final Document doc = new SAXReader().read(new StringReader(xml));
 		final Map<String, Community> communities = Maps.newHashMap();
 		for (final Object o : doc.selectNodes("//community")) {
 			final Node node = (Node) o;
 			final Community community = parseCommunity(node);
 			if (community.isValid()) {
 				communities.put(community.getId(), community);
 			}
 		}
 		log.info(String.format("loaded %s community configuration profiles", communities.size()));
 		log.debug(String.format("loaded community configuration:\n%s", communities.toString()));
 		return new CommunityConfiguration(communities);
 	}
 	public static CommunityConfiguration fromJson(final String json) {
 		GsonBuilder builder = new GsonBuilder();
 		builder.registerTypeAdapter(Selection.class, new InterfaceAdapter());
 		Gson gson = builder.create();
 		final CommunityConfiguration conf = gson.fromJson(json, CommunityConfiguration.class);
 		log.info(String.format("loaded %s community configuration profiles", conf.size()));
 		conf.init();
 		log.info("created inverse maps");
 		return conf;
 	}
 	private static Community parseCommunity(final Node node) {
 		final Community c = new Community();
 		c.setId(node.valueOf("./@id"));
 		log.info(String.format("community id: %s", c.getId()));
 		c.setSubjects(parseSubjects(node));
 		c.setProviders(parseDatasources(node));
 		c.setZenodoCommunities(parseZenodoCommunities(node));
 		return c;
 	}
 	private static List<String> parseSubjects(final Node node) {
 		final List<String> subjects = Lists.newArrayList();
 		final List<Node> list = node.selectNodes("./subjects/subject");
 		for (Node n : list) {
 			log.debug("text of the node " + n.getText());
 			subjects.add(StringUtils.trim(n.getText()));
 		}
 		log.info("size of the subject list " + subjects.size());
 		return subjects;
 	}
 	private static List<Provider> parseDatasources(final Node node) {
 		final List<Node> list = node.selectNodes("./datasources/datasource");
 		final List<Provider> providerList = new ArrayList<>();
 		for (Node n : list) {
 			Provider d = new Provider();
 			d.setOpenaireId(n.selectSingleNode("./openaireId").getText());
 			d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver);
 			providerList.add(d);
 		}
 		log.info("size of the datasource list " + providerList.size());
 		return providerList;
 	}
 	private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
 		final Node oacommunitynode = node.selectSingleNode("./oacommunity");
 		String oacommunity = null;
 		if (oacommunitynode != null) {
 			String tmp = oacommunitynode.getText();
 			if (StringUtils.isNotBlank(tmp))
 				oacommunity = tmp;
 		}
 		final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
 		final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
 		for (Node n : list) {
 			ZenodoCommunity zc = new ZenodoCommunity();
 			zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
 			zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
 			zenodoCommunityList.add(zc);
 		}
 		if (oacommunity != null) {
 			ZenodoCommunity zc = new ZenodoCommunity();
 			zc.setZenodoCommunityId(oacommunity);
 			zenodoCommunityList.add(zc);
 		}
 		log.info("size of the zenodo community list " + zenodoCommunityList.size());
 		return zenodoCommunityList;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java
@ -0,0 +1,56 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
 import eu.dnetlib.dhp.bulktag.criteria.Selection;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
 public class Constraint implements Serializable {
 	private String verb;
 	private String field;
 	private String value;
 	private Selection selection;
 	public Constraint() {
 	}
 	public String getVerb() {
 		return verb;
 	}
 	public void setVerb(String verb) {
 		this.verb = verb;
 	}
 	public String getField() {
 		return field;
 	}
 	public void setField(String field) {
 		this.field = field;
 	}
 	public String getValue() {
 		return value;
 	}
 	public void setValue(String value) {
 		this.value = value;
 	}
 	public void setSelection(Selection sel) {
 		selection = sel;
 	}
 	public void setSelection(VerbResolver resolver)
 		throws InvocationTargetException, NoSuchMethodException, InstantiationException,
 		IllegalAccessException {
 		selection = resolver.getSelectionCriteria(verb, value);
 	}
 	public boolean verifyCriteria(String metadata) {
 		return selection.apply(metadata);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
@ -0,0 +1,74 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Type;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import com.google.gson.Gson;
 import com.google.gson.reflect.TypeToken;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
 /** Created by miriam on 02/08/2018. */
 public class Constraints implements Serializable {
 	private static final Log log = LogFactory.getLog(Constraints.class);
 	// private ConstraintEncapsulator ce;
 	private List<Constraint> constraint;
 	public Constraints() {
 	}
 	public List<Constraint> getConstraint() {
 		return constraint;
 	}
 	public void setConstraint(List<Constraint> constraint) {
 		this.constraint = constraint;
 	}
 	public void setSc(String json) {
 		Type collectionType = new TypeToken<Collection<Constraint>>() {
 		}.getType();
 		constraint = new Gson().fromJson(json, collectionType);
 	}
 	void setSelection(VerbResolver resolver) {
 		for (Constraint st : constraint) {
 			try {
 				st.setSelection(resolver);
 			} catch (NoSuchMethodException e) {
 				log.error(e.getMessage());
 			} catch (IllegalAccessException e) {
 				log.error(e.getMessage());
 			} catch (InvocationTargetException e) {
 				log.error(e.getMessage());
 			} catch (InstantiationException e) {
 				log.error(e.getMessage());
 			}
 		}
 	}
 	// Constraint in and
 	public boolean verifyCriteria(final Map<String, List<String>> param) {
 		for (Constraint sc : constraint) {
 			boolean verified = false;
 			for (String value : param.get(sc.getField())) {
 				if (sc.verifyCriteria(value.trim())) {
 					verified = true;
 				}
 			}
 			if (!verified)
 				return verified;
 		}
 		return true;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java
@ -0,0 +1,39 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import com.google.gson.Gson;
 /** Created by miriam on 03/08/2018. */
 public class Pair<A, B> implements Serializable {
 	private A fst;
 	private B snd;
 	public A getFst() {
 		return fst;
 	}
 	public Pair setFst(A fst) {
 		this.fst = fst;
 		return this;
 	}
 	public B getSnd() {
 		return snd;
 	}
 	public Pair setSnd(B snd) {
 		this.snd = snd;
 		return this;
 	}
 	public Pair(A a, B b) {
 		fst = a;
 		snd = b;
 	}
 	public String toJson() {
 		return new Gson().toJson(this);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java
@ -0,0 +1,12 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.util.HashMap;
 public class ProtoMap extends HashMap<String, String> implements Serializable {
 	public ProtoMap() {
 		super();
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java
@ -0,0 +1,61 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Node;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
 /** Created by miriam on 01/08/2018. */
 public class Provider implements Serializable {
 	private static final Log log = LogFactory.getLog(Provider.class);
 	private String openaireId;
 	private SelectionConstraints selectionConstraints;
 	public SelectionConstraints getSelCriteria() {
 		return selectionConstraints;
 	}
 	public SelectionConstraints getSelectionConstraints() {
 		return selectionConstraints;
 	}
 	public void setSelectionConstraints(SelectionConstraints selectionConstraints) {
 		this.selectionConstraints = selectionConstraints;
 	}
 	public void setSelCriteria(SelectionConstraints selCriteria) {
 		this.selectionConstraints = selCriteria;
 	}
 	public String getOpenaireId() {
 		return openaireId;
 	}
 	public void setOpenaireId(String openaireId) {
 		this.openaireId = openaireId;
 	}
 	private void setSelCriteria(String json, VerbResolver resolver) {
 		log.info("Selection constraints for datasource = " + json);
 		selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class);
 		selectionConstraints.setSelection(resolver);
 	}
 	public void setSelCriteria(Node n, VerbResolver resolver) {
 		try {
 			setSelCriteria(n.getText(), resolver);
 		} catch (Exception e) {
 			log.info("not set selection criteria... ");
 			selectionConstraints = null;
 		}
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java
@ -0,0 +1,65 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.util.List;
 import org.dom4j.DocumentException;
 import com.google.common.base.Joiner;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class QueryInformationSystem {
 	private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
 		+ "  let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text() "
 		+ "  let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept  "
 		+ "  let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept  "
 		+ "  let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept  "
 		+ "  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']  "
 		+ "  return  "
 		+ "  <community>  "
 		+ "  { $x//CONFIGURATION/context/@id}  "
 		+ "  <subjects>  "
 		+ "  {for $y in tokenize($subj,',')  "
 		+ "  return  "
 		+ "  <subject>{$y}</subject>}  "
 		+ "  </subjects>  "
 		+ "  <datasources>  "
 		+ "  {for $d in $datasources  "
 		+ "  where $d/param[./@name='enabled']/text()='true'  "
 		+ "  return  "
 		+ "  <datasource>  "
 		+ "  <openaireId>  "
 		+ "  {$d//param[./@name='openaireId']/text()}  "
 		+ "  </openaireId>  "
 		+ "  <selcriteria>  "
 		+ "  {$d/param[./@name='selcriteria']/text()}  "
 		+ "  </selcriteria>  "
 		+ "  </datasource> } "
 		+ "  </datasources>  "
 		+ "  <zenodocommunities>  "
 		+ "  {for $zc in $communities  "
 		+ "  return  "
 		+ "  <zenodocommunity>  "
 		+ "  <zenodoid>  "
 		+ "  {$zc/param[./@name='zenodoid']/text()} "
 		+ "  </zenodoid> "
 		+ "  <selcriteria> "
 		+ "  {$zc/param[./@name='selcriteria']/text()} "
 		+ "  </selcriteria> "
 		+ "  </zenodocommunity>} "
 		+ "  </zenodocommunities>  "
 		+ "  </community>";
 	public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
 		throws ISLookUpException, DocumentException {
 		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
 		final List<String> res = isLookUp.quickSearchProfile(XQUERY);
 		final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
 		return CommunityConfigurationFactory.newInstance(xmlConf);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java
@ -0,0 +1,247 @@
 package eu.dnetlib.dhp.bulktag.community;
 import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
 import java.io.Serializable;
 import java.util.*;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.lang3.StringUtils;
 import com.google.gson.Gson;
 import com.jayway.jsonpath.DocumentContext;
 import com.jayway.jsonpath.JsonPath;
 import eu.dnetlib.dhp.schema.oaf.*;
 /** Created by miriam on 02/08/2018. */
 public class ResultTagger implements Serializable {
 	private String trust = "0.8";
 	private boolean clearContext(Result result) {
 		int tmp = result.getContext().size();
 		List<Context> clist = result
 			.getContext()
 			.stream()
 			.filter(c -> (!c.getId().contains(ZENODO_COMMUNITY_INDICATOR)))
 			.collect(Collectors.toList());
 		result.setContext(clist);
 		return (tmp != clist.size());
 	}
 	private Map<String, List<String>> getParamMap(final Result result, Map<String, String> params) {
 		Map<String, List<String>> param = new HashMap<>();
 		String json = new Gson().toJson(result, Result.class);
 		DocumentContext jsonContext = JsonPath.parse(json);
 		if (params == null) {
 			params = new HashMap<>();
 		}
 		for (String key : params.keySet()) {
 			try {
 				param.put(key, jsonContext.read(params.get(key)));
 			} catch (com.jayway.jsonpath.PathNotFoundException e) {
 				param.put(key, new ArrayList<>());
 				// throw e;
 			}
 		}
 		return param;
 	}
 	public <R extends Result> R enrichContextCriteria(
 		final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {
 		// }
 		// public Result enrichContextCriteria(final Result result, final CommunityConfiguration
 		// conf, final Map<String,String> criteria) {
 		final Map<String, List<String>> param = getParamMap(result, criteria);
 		// Verify if the entity is deletedbyinference. In case verify if to clean the context list
 		// from all the zenodo communities
 		if (result.getDataInfo().getDeletedbyinference()) {
 			clearContext(result);
 			return result;
 		}
 		// communities contains all the communities to be added as context for the result
 		final Set<String> communities = new HashSet<>();
 		// tagging for Subject
 		final Set<String> subjects = new HashSet<>();
 		Optional<List<StructuredProperty>> oresultsubj = Optional.ofNullable(result.getSubject());
 		if (oresultsubj.isPresent()) {
 			oresultsubj
 				.get()
 				.stream()
 				.map(subject -> subject.getValue())
 				.filter(StringUtils::isNotBlank)
 				.map(String::toLowerCase)
 				.map(String::trim)
 				.collect(Collectors.toCollection(HashSet::new))
 				.forEach(s -> subjects.addAll(conf.getCommunityForSubjectValue(s)));
 		}
 		communities.addAll(subjects);
 		// Tagging for datasource
 		final Set<String> datasources = new HashSet<>();
 		final Set<String> tmp = new HashSet<>();
 		Optional<List<Instance>> oresultinstance = Optional.ofNullable(result.getInstance());
 		if (oresultinstance.isPresent()) {
 			for (Instance i : oresultinstance.get()) {
 				tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
 				tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
 			}
 			oresultinstance
 				.get()
 				.stream()
 				.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
 				.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
 				.map(s -> StringUtils.substringAfter(s, "|"))
 				.collect(Collectors.toCollection(HashSet::new))
 				.forEach(
 					dsId -> datasources
 						.addAll(
 							conf.getCommunityForDatasource(dsId, param)));
 		}
 		communities.addAll(datasources);
 		/* Tagging for Zenodo Communities */
 		final Set<String> czenodo = new HashSet<>();
 		Optional<List<Context>> oresultcontext = Optional.ofNullable(result.getContext());
 		if (oresultcontext.isPresent()) {
 			oresultcontext
 				.get()
 				.stream()
 				.filter(c -> c.getId().contains(ZENODO_COMMUNITY_INDICATOR))
 				.collect(Collectors.toList())
 				.forEach(
 					c -> czenodo
 						.addAll(
 							conf
 								.getCommunityForZenodoCommunityValue(
 									c
 										.getId()
 										.substring(
 											c.getId().lastIndexOf("/") + 1)
 										.trim())));
 		}
 		communities.addAll(czenodo);
 		clearContext(result);
 		/* Verify if there is something to bulktag */
 		if (communities.isEmpty()) {
 			return result;
 		}
 		result
 			.getContext()
 			.stream()
 			.map(
 				c -> {
 					if (communities.contains(c.getId())) {
 						Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo());
 						List<DataInfo> dataInfoList;
 						if (opt_dataInfoList.isPresent())
 							dataInfoList = opt_dataInfoList.get();
 						else {
 							dataInfoList = new ArrayList<>();
 							c.setDataInfo(dataInfoList);
 						}
 						if (subjects.contains(c.getId()))
 							dataInfoList
 								.add(
 									getDataInfo(
 										BULKTAG_DATA_INFO_TYPE,
 										CLASS_ID_SUBJECT,
 										CLASS_NAME_BULKTAG_SUBJECT));
 						if (datasources.contains(c.getId()))
 							dataInfoList
 								.add(
 									getDataInfo(
 										BULKTAG_DATA_INFO_TYPE,
 										CLASS_ID_DATASOURCE,
 										CLASS_NAME_BULKTAG_DATASOURCE));
 						if (czenodo.contains(c.getId()))
 							dataInfoList
 								.add(
 									getDataInfo(
 										BULKTAG_DATA_INFO_TYPE,
 										CLASS_ID_CZENODO,
 										CLASS_NAME_BULKTAG_ZENODO));
 					}
 					return c;
 				})
 			.collect(Collectors.toList());
 		communities
 			.removeAll(
 				result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet()));
 		if (communities.isEmpty())
 			return result;
 		List<Context> toaddcontext = communities
 			.stream()
 			.map(
 				c -> {
 					Context context = new Context();
 					context.setId(c);
 					List<DataInfo> dataInfoList = new ArrayList<>();
 					if (subjects.contains(c))
 						dataInfoList
 							.add(
 								getDataInfo(
 									BULKTAG_DATA_INFO_TYPE,
 									CLASS_ID_SUBJECT,
 									CLASS_NAME_BULKTAG_SUBJECT));
 					if (datasources.contains(c))
 						dataInfoList
 							.add(
 								getDataInfo(
 									BULKTAG_DATA_INFO_TYPE,
 									CLASS_ID_DATASOURCE,
 									CLASS_NAME_BULKTAG_DATASOURCE));
 					if (czenodo.contains(c))
 						dataInfoList
 							.add(
 								getDataInfo(
 									BULKTAG_DATA_INFO_TYPE,
 									CLASS_ID_CZENODO,
 									CLASS_NAME_BULKTAG_ZENODO));
 					context.setDataInfo(dataInfoList);
 					return context;
 				})
 			.collect(Collectors.toList());
 		result.getContext().addAll(toaddcontext);
 		return result;
 	}
 	public static DataInfo getDataInfo(
 		String inference_provenance, String inference_class_id, String inference_class_name) {
 		DataInfo di = new DataInfo();
 		di.setInferred(true);
 		di.setInferenceprovenance(inference_provenance);
 		di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
 		return di;
 	}
 	public static Qualifier getQualifier(String inference_class_id, String inference_class_name) {
 		Qualifier pa = new Qualifier();
 		pa.setClassid(inference_class_id);
 		pa.setClassname(inference_class_name);
 		pa.setSchemeid(DNET_PROVENANCE_ACTIONS);
 		pa.setSchemename(DNET_PROVENANCE_ACTIONS);
 		return pa;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java
@ -0,0 +1,51 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import java.lang.reflect.Type;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import com.google.gson.Gson;
 import com.google.gson.reflect.TypeToken;
 import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
 public class SelectionConstraints implements Serializable {
 	private List<Constraints> criteria;
 	public SelectionConstraints() {
 	}
 	public List<Constraints> getCriteria() {
 		return criteria;
 	}
 	public void setCriteria(List<Constraints> criteria) {
 		this.criteria = criteria;
 	}
 	public void setSc(String json) {
 		Type collectionType = new TypeToken<Collection<Constraints>>() {
 		}.getType();
 		criteria = new Gson().fromJson(json, collectionType);
 	}
 	// Constraints in or
 	public boolean verifyCriteria(final Map<String, List<String>> param) {
 		for (Constraints selc : criteria) {
 			if (selc.verifyCriteria(param)) {
 				return true;
 			}
 		}
 		return false;
 	}
 	public void setSelection(VerbResolver resolver) {
 		for (Constraints cs : criteria) {
 			cs.setSelection(resolver);
 		}
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java
@ -0,0 +1,17 @@
 package eu.dnetlib.dhp.bulktag.community;
 public class TaggingConstants {
 	public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
 	public static final String CLASS_ID_SUBJECT = "community:subject";
 	public static final String CLASS_ID_DATASOURCE = "community:datasource";
 	public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
 	public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
 	public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
 	public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
 	public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java
@ -0,0 +1,45 @@
 package eu.dnetlib.dhp.bulktag.community;
 import java.io.Serializable;
 import org.dom4j.Node;
 import com.google.gson.Gson;
 /** Created by miriam on 01/08/2018. */
 public class ZenodoCommunity implements Serializable {
 	private String zenodoCommunityId;
 	private SelectionConstraints selCriteria;
 	public String getZenodoCommunityId() {
 		return zenodoCommunityId;
 	}
 	public void setZenodoCommunityId(String zenodoCommunityId) {
 		this.zenodoCommunityId = zenodoCommunityId;
 	}
 	public SelectionConstraints getSelCriteria() {
 		return selCriteria;
 	}
 	public void setSelCriteria(SelectionConstraints selCriteria) {
 		this.selCriteria = selCriteria;
 	}
 	private void setSelCriteria(String json) {
 		// Type collectionType = new TypeToken<Collection<Constraints>>(){}.getType();
 		selCriteria = new Gson().fromJson(json, SelectionConstraints.class);
 	}
 	public void setSelCriteria(Node n) {
 		if (n == null) {
 			selCriteria = null;
 		} else {
 			setSelCriteria(n.getText());
 		}
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerb.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerb.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("contains")
 public class ContainsVerb implements Selection, Serializable {
 	private String param;
 	public ContainsVerb() {
 	}
 	public ContainsVerb(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return value.contains(param);
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("contains_ignorecase")
 public class ContainsVerbIgnoreCase implements Selection, Serializable {
 	private String param;
 	public ContainsVerbIgnoreCase() {
 	}
 	public ContainsVerbIgnoreCase(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return value.toLowerCase().contains(param.toLowerCase());
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerb.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerb.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("equals")
 public class EqualVerb implements Selection, Serializable {
 	private String param;
 	public EqualVerb() {
 	}
 	public EqualVerb(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return value.equals(param);
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("equals_ignorecase")
 public class EqualVerbIgnoreCase implements Selection, Serializable {
 	private String param;
 	public EqualVerbIgnoreCase() {
 	}
 	public EqualVerbIgnoreCase(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return value.equalsIgnoreCase(param);
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java
@ -0,0 +1,43 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.lang.reflect.Type;
 import com.google.gson.*;
 public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
 	private static final String CLASSNAME = "CLASSNAME";
 	private static final String DATA = "DATA";
 	public Object deserialize(
 		JsonElement jsonElement,
 		Type type,
 		JsonDeserializationContext jsonDeserializationContext)
 		throws JsonParseException {
 		JsonObject jsonObject = jsonElement.getAsJsonObject();
 		JsonPrimitive prim = (JsonPrimitive) jsonObject.get(CLASSNAME);
 		String className = prim.getAsString();
 		Class klass = getObjectClass(className);
 		return jsonDeserializationContext.deserialize(jsonObject.get(DATA), klass);
 	}
 	public JsonElement serialize(
 		Object jsonElement, Type type, JsonSerializationContext jsonSerializationContext) {
 		JsonObject jsonObject = new JsonObject();
 		jsonObject.addProperty(CLASSNAME, jsonElement.getClass().getName());
 		jsonObject.add(DATA, jsonSerializationContext.serialize(jsonElement));
 		return jsonObject;
 	}
 	/** **** Helper method to get the className of the object to be deserialized **** */
 	public Class getObjectClass(String className) {
 		try {
 			return Class.forName(className);
 		} catch (ClassNotFoundException e) {
 			// e.printStackTrace();
 			throw new JsonParseException(e.getMessage());
 		}
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerb.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerb.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("not_contains")
 public class NotContainsVerb implements Selection, Serializable {
 	private String param;
 	public NotContainsVerb() {
 	}
 	public NotContainsVerb(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return !value.contains(param);
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("not_contains_ignorecase")
 public class NotContainsVerbIgnoreCase implements Selection, Serializable {
 	private String param;
 	public NotContainsVerbIgnoreCase() {
 	}
 	public NotContainsVerbIgnoreCase(final String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return !(value.toLowerCase().contains(param.toLowerCase()));
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerb.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerb.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("not_equals")
 public class NotEqualVerb implements Selection, Serializable {
 	private String param;
 	public NotEqualVerb(final String param) {
 		this.param = param;
 	}
 	public NotEqualVerb() {
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return !value.equals(param);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java
@ -0,0 +1,30 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
@VerbClass("not_equals_ignorecase")
 public class NotEqualVerbIgnoreCase implements Selection, Serializable {
 	private String param;
 	public NotEqualVerbIgnoreCase(final String param) {
 		this.param = param;
 	}
 	public NotEqualVerbIgnoreCase() {
 	}
 	public String getParam() {
 		return param;
 	}
 	public void setParam(String param) {
 		this.param = param;
 	}
 	@Override
 	public boolean apply(String value) {
 		return !value.equalsIgnoreCase(param);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java
@ -0,0 +1,7 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 public interface Selection {
 	boolean apply(String value);
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbClass.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbClass.java
@ -0,0 +1,14 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
@interface VerbClass {
 	String value();
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java
@ -0,0 +1,56 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
 import java.util.Map;
 import java.util.stream.Collectors;
 import io.github.classgraph.ClassGraph;
 import io.github.classgraph.ClassInfo;
 import io.github.classgraph.ClassInfoList;
 import io.github.classgraph.ScanResult;
 public class VerbResolver implements Serializable {
 	private Map<String, Class<Selection>> map = null; // = new HashMap<>();
 	private final ClassGraph classgraph = new ClassGraph();
 	public VerbResolver() {
 		try (ScanResult scanResult = // Assign scanResult in try-with-resources
 			classgraph // Create a new ClassGraph instance
 				.verbose() // If you want to enable logging to stderr
 				.enableAllInfo() // Scan classes, methods, fields, annotations
 				.whitelistPackages(
 					"eu.dnetlib.dhp.bulktag.criteria") // Scan com.xyz and subpackages
 				.scan()) { // Perform the scan and return a ScanResult
 			ClassInfoList routeClassInfoList = scanResult
 				.getClassesWithAnnotation(
 					"eu.dnetlib.dhp.bulktag.criteria.VerbClass");
 			this.map = routeClassInfoList
 				.stream()
 				.collect(
 					Collectors
 						.toMap(
 							value -> (String) ((ClassInfo) value)
 								.getAnnotationInfo()
 								.get(0)
 								.getParameterValues()
 								.get(0)
 								.getValue(),
 							value -> (Class<Selection>) ((ClassInfo) value).loadClass()));
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
 	}
 	public Selection getSelectionCriteria(String name, String param)
 		throws NoSuchMethodException, IllegalAccessException, InvocationTargetException,
 		InstantiationException {
 		// return Class.forName(tmp_map.get(name)).
 		return map.get(name).getDeclaredConstructor((String.class)).newInstance(param);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java
@ -0,0 +1,10 @@
 package eu.dnetlib.dhp.bulktag.criteria;
 public class VerbResolverFactory {
 	public static VerbResolver newInstance() {
 		return new VerbResolver();
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java
@ -0,0 +1,25 @@
 package eu.dnetlib.dhp.countrypropagation;
 import java.io.Serializable;
 public class CountrySbs implements Serializable {
 	private String classid;
 	private String classname;
 	public String getClassid() {
 		return classid;
 	}
 	public void setClassid(String classid) {
 		this.classid = classid;
 	}
 	public String getClassname() {
 		return classname;
 	}
 	public void setClassname(String classname) {
 		this.classname = classname;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java
@ -0,0 +1,25 @@
 package eu.dnetlib.dhp.countrypropagation;
 import java.io.Serializable;
 public class DatasourceCountry implements Serializable {
 	private String dataSourceId;
 	private CountrySbs country;
 	public String getDataSourceId() {
 		return dataSourceId;
 	}
 	public void setDataSourceId(String dataSourceId) {
 		this.dataSourceId = dataSourceId;
 	}
 	public CountrySbs getCountry() {
 		return country;
 	}
 	public void setCountry(CountrySbs country) {
 		this.country = country;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -0,0 +1,121 @@
 package eu.dnetlib.dhp.countrypropagation;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.*;
 /**
 * For the association of the country to the datasource The association is computed only for datasource of specific type
 * or having whitelisted ids The country is registered in the Organization associated to the Datasource, so the relation
 * provides between Datasource and Organization is exploited to get the country for the datasource
 */
 public class PrepareDatasourceCountryAssociation {
 	private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareDatasourceCountryAssociation.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}: ", outputPath);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				removeOutputDir(spark, outputPath);
 				prepareDatasourceCountryAssociation(
 					spark,
 					Arrays.asList(parser.get("whitelist").split(";")),
 					Arrays.asList(parser.get("allowedtypes").split(";")),
 					inputPath,
 					outputPath);
 			});
 	}
 	private static void prepareDatasourceCountryAssociation(
 		SparkSession spark,
 		List<String> whitelist,
 		List<String> allowedtypes,
 		String inputPath,
 		String outputPath) {
 		String whitelisted = "";
 		for (String i : whitelist) {
 			whitelisted += " OR id = '" + i + "'";
 		}
 		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
 		Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
 		Dataset<Organization> organization = readPath(spark, inputPath + "/organization", Organization.class);
 		datasource.createOrReplaceTempView("datasource");
 		relation.createOrReplaceTempView("relation");
 		organization.createOrReplaceTempView("organization");
 		String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
 			+ "FROM ( SELECT id "
 			+ "       FROM datasource "
 			+ "       WHERE (datainfo.deletedbyinference = false "
 			+ whitelisted
 			+ ") "
 			+ getConstraintList("datasourcetype.classid = '", allowedtypes)
 			+ ") d "
 			+ "JOIN ( SELECT source, target "
 			+ "       FROM relation "
 			+ "       WHERE relclass = '"
 			+ RELATION_DATASOURCE_ORGANIZATION_REL_CLASS
 			+ "' "
 			+ "       AND datainfo.deletedbyinference = false ) rel "
 			+ "ON d.id = rel.source "
 			+ "JOIN (SELECT id, country "
 			+ "      FROM organization "
 			+ "      WHERE datainfo.deletedbyinference = false "
 			+ "      AND length(country.classid) > 0) o "
 			+ "ON o.id = rel.target";
 		spark
 			.sql(query)
 			.as(Encoders.bean(DatasourceCountry.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(outputPath);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
@ -0,0 +1,98 @@
 package eu.dnetlib.dhp.countrypropagation;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.*;
 public class PrepareResultCountrySet {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class);
 	private static final String RESULT_COUNTRYSET_QUERY = "SELECT id resultId, collect_set(country) countrySet "
 		+ "FROM ( SELECT id, country "
 		+ "FROM datasource_country JOIN cfhb ON cf = dataSourceId "
 		+ "UNION ALL "
 		+ "SELECT id, country FROM datasource_country "
 		+ "JOIN cfhb ON hb = dataSourceId ) tmp "
 		+ "GROUP BY id";
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultCountrySet.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String datasourcecountrypath = parser.get("preparedInfoPath");
 		log.info("preparedInfoPath: {}", datasourcecountrypath);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				getPotentialResultToUpdate(
 					spark,
 					inputPath,
 					outputPath,
 					datasourcecountrypath,
 					resultClazz);
 			});
 	}
 	private static <R extends Result> void getPotentialResultToUpdate(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		String datasourcecountrypath,
 		Class<R> resultClazz) {
 		Dataset<R> result = readPath(spark, inputPath, resultClazz);
 		result.createOrReplaceTempView("result");
 		// log.info("number of results: {}", result.count());
 		createCfHbforResult(spark);
 		Dataset<DatasourceCountry> datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class);
 		datasource_country.createOrReplaceTempView("datasource_country");
 		// log.info("datasource_country number : {}", datasource_country.count());
 		spark
 			.sql(RESULT_COUNTRYSET_QUERY)
 			.as(Encoders.bean(ResultCountrySet.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Append)
 			.json(outputPath);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java
@ -0,0 +1,26 @@
 package eu.dnetlib.dhp.countrypropagation;
 import java.io.Serializable;
 import java.util.ArrayList;
 public class ResultCountrySet implements Serializable {
 	private String resultId;
 	private ArrayList<CountrySbs> countrySet;
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public ArrayList<CountrySbs> getCountrySet() {
 		return countrySet;
 	}
 	public void setCountrySet(ArrayList<CountrySbs> countrySet) {
 		this.countrySet = countrySet;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -0,0 +1,132 @@
 package eu.dnetlib.dhp.countrypropagation;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.*;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Country;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import scala.Tuple2;
 public class SparkCountryPropagationJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkCountryPropagationJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String sourcePath = parser.get("sourcePath");
 		log.info("sourcePath: {}", sourcePath);
 		String preparedInfoPath = parser.get("preparedInfoPath");
 		log.info("preparedInfoPath: {}", preparedInfoPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> execPropagation(
 				spark,
 				sourcePath,
 				preparedInfoPath,
 				outputPath,
 				resultClazz,
 				saveGraph));
 	}
 	private static <R extends Result> void execPropagation(
 		SparkSession spark,
 		String sourcePath,
 		String preparedInfoPath,
 		String outputPath,
 		Class<R> resultClazz,
 		boolean saveGraph) {
 		if (saveGraph) {
 			// updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath);
 			log.info("Reading Graph table from: {}", sourcePath);
 			Dataset<R> res = readPath(spark, sourcePath, resultClazz);
 			log.info("Reading prepared info: {}", preparedInfoPath);
 			Dataset<ResultCountrySet> prepared = spark
 				.read()
 				.json(preparedInfoPath)
 				.as(Encoders.bean(ResultCountrySet.class));
 			res
 				.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
 				.map(getCountryMergeFn(), Encoders.bean(resultClazz))
 				.write()
 				.option("compression", "gzip")
 				.mode(SaveMode.Overwrite)
 				.json(outputPath);
 		}
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
 		return (MapFunction<Tuple2<R, ResultCountrySet>, R>) t -> {
 			Optional.ofNullable(t._2()).ifPresent(r -> {
 				t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
 			});
 			return t._1();
 		};
 	}
 	private static List<Country> merge(List<Country> c1, List<CountrySbs> c2) {
 		HashSet<String> countries = c1
 			.stream()
 			.map(c -> c.getClassid())
 			.collect(Collectors.toCollection(HashSet::new));
 		return c2
 			.stream()
 			.filter(c -> !countries.contains(c.getClassid()))
 			.map(c -> getCountry(c.getClassid(), c.getClassname()))
 			.collect(Collectors.toList());
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java
@ -0,0 +1,43 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;
 public class AutoritativeAuthor {
 	private String name;
 	private String surname;
 	private String fullname;
 	private String orcid;
 	public String getName() {
 		return name;
 	}
 	public void setName(String name) {
 		this.name = name;
 	}
 	public String getSurname() {
 		return surname;
 	}
 	public void setSurname(String surname) {
 		this.surname = surname;
 	}
 	public String getFullname() {
 		return fullname;
 	}
 	public void setFullname(String fullname) {
 		this.fullname = fullname;
 	}
 	public String getOrcid() {
 		return orcid;
 	}
 	public void setOrcid(String orcid) {
 		this.orcid = orcid;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java
@ -0,0 +1,125 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
 public class PrepareResultOrcidAssociationStep1 {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep1.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConf = IOUtils
 			.toString(
 				PrepareResultOrcidAssociationStep1.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
 		log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
 		final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
 		log.info("resultType: {}", resultType);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		String inputRelationPath = inputPath + "/relation";
 		log.info("inputRelationPath: {}", inputRelationPath);
 		String inputResultPath = inputPath + "/" + resultType;
 		log.info("inputResultPath: {}", inputResultPath);
 		String outputResultPath = outputPath + "/" + resultType;
 		log.info("outputResultPath: {}", outputResultPath);
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				prepareInfo(
 					spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel);
 			});
 	}
 	private static <R extends Result> void prepareInfo(
 		SparkSession spark,
 		String inputRelationPath,
 		String inputResultPath,
 		String outputResultPath,
 		Class<R> resultClazz,
 		List<String> allowedsemrel) {
 		Dataset<Relation> relation = readPath(spark, inputRelationPath, Relation.class);
 		relation.createOrReplaceTempView("relation");
 		log.info("Reading Graph table from: {}", inputResultPath);
 		Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
 		result.createOrReplaceTempView("result");
 		String query = " select target resultId, author authorList"
 			+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
 			+ " from ( "
 			+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
 			+ " from result "
 			+ " lateral view explode (author) a as MyT "
 			+ " lateral view explode (MyT.pid) p as MyP "
 			+ " where MyP.qualifier.classid = 'ORCID') tmp "
 			+ " group by id) r_t "
 			+ " join ("
 			+ " select source, target "
 			+ " from relation "
 			+ " where datainfo.deletedbyinference = false "
 			+ getConstraintList(" relclass = '", allowedsemrel)
 			+ ") rel_rel "
 			+ " on source = id";
 		spark
 			.sql(query)
 			.as(Encoders.bean(ResultOrcidList.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(outputResultPath);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java
@ -0,0 +1,97 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import scala.Tuple2;
 public class PrepareResultOrcidAssociationStep2 {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep2.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultOrcidAssociationStep2.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				mergeInfo(spark, inputPath, outputPath);
 			});
 	}
 	private static void mergeInfo(SparkSession spark, String inputPath, String outputPath) {
 		Dataset<ResultOrcidList> resultOrcidAssoc = readPath(spark, inputPath + "/publication", ResultOrcidList.class)
 			.union(readPath(spark, inputPath + "/dataset", ResultOrcidList.class))
 			.union(readPath(spark, inputPath + "/otherresearchproduct", ResultOrcidList.class))
 			.union(readPath(spark, inputPath + "/software", ResultOrcidList.class));
 		resultOrcidAssoc
 			.toJavaRDD()
 			.mapToPair(r -> new Tuple2<>(r.getResultId(), r))
 			.reduceByKey(
 				(a, b) -> {
 					if (a == null) {
 						return b;
 					}
 					if (b == null) {
 						return a;
 					}
 					Set<String> orcid_set = new HashSet<>();
 					a.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
 					b
 						.getAuthorList()
 						.stream()
 						.forEach(
 							aa -> {
 								if (!orcid_set.contains(aa.getOrcid())) {
 									a.getAuthorList().add(aa);
 									orcid_set.add(aa.getOrcid());
 								}
 							});
 					return a;
 				})
 			.map(c -> c._2())
 			.map(r -> OBJECT_MAPPER.writeValueAsString(r))
 			.saveAsTextFile(outputPath, GzipCodec.class);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java
@ -0,0 +1,27 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 public class ResultOrcidList implements Serializable {
 	String resultId;
 	List<AutoritativeAuthor> authorList = new ArrayList<>();
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public List<AutoritativeAuthor> getAuthorList() {
 		return authorList;
 	}
 	public void setAuthorList(List<AutoritativeAuthor> authorList) {
 		this.authorList = authorList;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
@ -0,0 +1,199 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.List;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Lists;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import scala.Tuple2;
 public class SparkOrcidToResultFromSemRelJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkOrcidToResultFromSemRelJob.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkOrcidToResultFromSemRelJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String possibleUpdates = parser.get("possibleUpdatesPath");
 		log.info("possibleUpdatesPath: {}", possibleUpdates);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				if (saveGraph)
 					execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
 			});
 	}
 	private static <R extends Result> void execPropagation(
 		SparkSession spark,
 		String possibleUpdatesPath,
 		String inputPath,
 		String outputPath,
 		Class<R> resultClazz) {
 		// read possible updates (resultId and list of possible orcid to add
 		Dataset<ResultOrcidList> possible_updates = readPath(spark, possibleUpdatesPath, ResultOrcidList.class);
 		// read the result we have been considering
 		Dataset<R> result = readPath(spark, inputPath, resultClazz);
 		// make join result left_outer with possible updates
 		result
 			.joinWith(
 				possible_updates,
 				result.col("id").equalTo(possible_updates.col("resultId")),
 				"left_outer")
 			.map(authorEnrichFn(), Encoders.bean(resultClazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultOrcidList>, R> authorEnrichFn() {
 		return (MapFunction<Tuple2<R, ResultOrcidList>, R>) value -> {
 			R ret = value._1();
 			Optional<ResultOrcidList> rol = Optional.ofNullable(value._2());
 			if (rol.isPresent()) {
 				List<Author> toenrich_author = ret.getAuthor();
 				List<AutoritativeAuthor> autoritativeAuthors = rol.get().getAuthorList();
 				for (Author author : toenrich_author) {
 					if (!containsAllowedPid(author)) {
 						enrichAuthor(author, autoritativeAuthors);
 					}
 				}
 			}
 			return ret;
 		};
 	}
 	private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
 		for (AutoritativeAuthor aa : au) {
 			if (enrichAuthor(aa, a)) {
 				return;
 			}
 		}
 	}
 	private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
 		boolean toaddpid = false;
 		if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) {
 			if (StringUtils.isNoneEmpty(author.getSurname())) {
 				if (autoritative_author
 					.getSurname()
 					.trim()
 					.equalsIgnoreCase(author.getSurname().trim())) {
 					// have the same surname. Check the name
 					if (StringUtils.isNoneEmpty(autoritative_author.getName())) {
 						if (StringUtils.isNoneEmpty(author.getName())) {
 							if (autoritative_author
 								.getName()
 								.trim()
 								.equalsIgnoreCase(author.getName().trim())) {
 								toaddpid = true;
 							}
 							// they could be differently written (i.e. only the initials of the name
 							// in one of the two
 							if (autoritative_author
 								.getName()
 								.trim()
 								.substring(0, 0)
 								.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
 								toaddpid = true;
 							}
 						}
 					}
 				}
 			}
 		}
 		if (toaddpid) {
 			StructuredProperty p = new StructuredProperty();
 			p.setValue(autoritative_author.getOrcid());
 			p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
 			p
 				.setDataInfo(
 					getDataInfo(
 						PROPAGATION_DATA_INFO_TYPE,
 						PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
 						PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
 			Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
 			if (authorPid.isPresent()) {
 				authorPid.get().add(p);
 			} else {
 				author.setPid(Lists.newArrayList(p));
 			}
 		}
 		return toaddpid;
 	}
 	private static boolean containsAllowedPid(Author a) {
 		Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());
 		if (!pids.isPresent()) {
 			return false;
 		}
 		for (StructuredProperty pid : pids.get()) {
 			if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) {
 				return true;
 			}
 		}
 		return false;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java
@ -0,0 +1,126 @@
 package eu.dnetlib.dhp.projecttoresult;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.PropagationConstant.getConstraintList;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class PrepareProjectResultsAssociation {
 	private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareProjectResultsAssociation.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String potentialUpdatePath = parser.get("potentialUpdatePath");
 		log.info("potentialUpdatePath {}: ", potentialUpdatePath);
 		String alreadyLinkedPath = parser.get("alreadyLinkedPath");
 		log.info("alreadyLinkedPath: {} ", alreadyLinkedPath);
 		final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
 		log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				prepareResultProjProjectResults(
 					spark,
 					inputPath,
 					potentialUpdatePath,
 					alreadyLinkedPath,
 					allowedsemrel);
 			});
 	}
 	private static void prepareResultProjProjectResults(
 		SparkSession spark,
 		String inputPath,
 		String potentialUpdatePath,
 		String alreadyLinkedPath,
 		List<String> allowedsemrel) {
 		Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
 		relation.createOrReplaceTempView("relation");
 		String resproj_relation_query = "SELECT source, target "
 			+ "       FROM relation "
 			+ "       WHERE datainfo.deletedbyinference = false "
 			+ "       AND relClass = '"
 			+ RELATION_RESULT_PROJECT_REL_CLASS
 			+ "'";
 		Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
 		resproj_relation.createOrReplaceTempView("resproj_relation");
 		String potential_update_query = "SELECT resultId, collect_set(projectId) projectSet "
 			+ "FROM ( "
 			+ "SELECT r1.target resultId, r2.target projectId "
 			+ "      FROM (SELECT source, target "
 			+ "            FROM relation "
 			+ "            WHERE datainfo.deletedbyinference = false  "
 			+ getConstraintList(" relClass = '", allowedsemrel)
 			+ "            ) r1"
 			+ "      JOIN resproj_relation r2 "
 			+ "      ON r1.source = r2.source "
 			+ "      ) tmp "
 			+ "GROUP BY resultId ";
 		spark
 			.sql(potential_update_query)
 			.as(Encoders.bean(ResultProjectSet.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(potentialUpdatePath);
 		String result_projectset_query = "SELECT source resultId, collect_set(target) projectSet "
 			+ "FROM resproj_relation "
 			+ "GROUP BY source";
 		spark
 			.sql(result_projectset_query)
 			.as(Encoders.bean(ResultProjectSet.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(alreadyLinkedPath);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java
@ -0,0 +1,26 @@
 package eu.dnetlib.dhp.projecttoresult;
 import java.io.Serializable;
 import java.util.ArrayList;
 public class ResultProjectSet implements Serializable {
 	private String resultId;
 	private ArrayList<String> projectSet;
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public ArrayList<String> getProjectSet() {
 		return projectSet;
 	}
 	public void setProjectSet(ArrayList<String> project) {
 		this.projectSet = project;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
@ -0,0 +1,147 @@
 package eu.dnetlib.dhp.projecttoresult;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Optional;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;
 public class SparkResultToProjectThroughSemRelJob {
 	private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkResultToProjectThroughSemRelJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath {}: ", outputPath);
 		final String potentialUpdatePath = parser.get("potentialUpdatePath");
 		log.info("potentialUpdatePath {}: ", potentialUpdatePath);
 		final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
 		log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);
 		final Boolean saveGraph = Boolean.valueOf(parser.get("saveGraph"));
 		log.info("saveGraph: {}", saveGraph);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				execPropagation(
 					spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph);
 			});
 	}
 	private static void execPropagation(
 		SparkSession spark,
 		String outputPath,
 		String alreadyLinkedPath,
 		String potentialUpdatePath,
 		Boolean saveGraph) {
 		Dataset<ResultProjectSet> toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class);
 		Dataset<ResultProjectSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class);
 		if (saveGraph) {
 			toaddrelations
 				.joinWith(
 					alreadyLinked,
 					toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")),
 					"left_outer")
 				.flatMap(mapRelationRn(), Encoders.bean(Relation.class))
 				.write()
 				.mode(SaveMode.Append)
 				.option("compression", "gzip")
 				.json(outputPath);
 		}
 	}
 	private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
 		return (FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation>) value -> {
 			List<Relation> new_relations = new ArrayList<>();
 			ResultProjectSet potential_update = value._1();
 			Optional<ResultProjectSet> already_linked = Optional.ofNullable(value._2());
 			if (already_linked.isPresent()) {
 				already_linked
 					.get()
 					.getProjectSet()
 					.stream()
 					.forEach(
 						(p -> {
 							if (potential_update
 								.getProjectSet()
 								.contains(p)) {
 								potential_update.getProjectSet().remove(p);
 							}
 						}));
 			}
 			String resId = potential_update.getResultId();
 			potential_update
 				.getProjectSet()
 				.stream()
 				.forEach(
 					projectId -> {
 						new_relations
 							.add(
 								getRelation(
 									resId,
 									projectId,
 									RELATION_RESULT_PROJECT_REL_CLASS,
 									RELATION_RESULTPROJECT_REL_TYPE,
 									RELATION_RESULTPROJECT_SUBREL_TYPE,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
 						new_relations
 							.add(
 								getRelation(
 									projectId,
 									resId,
 									RELATION_PROJECT_RESULT_REL_CLASS,
 									RELATION_RESULTPROJECT_REL_TYPE,
 									RELATION_RESULTPROJECT_SUBREL_TYPE,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME));
 					});
 			return new_relations.iterator();
 		};
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java
@ -0,0 +1,21 @@
 package eu.dnetlib.dhp.resulttocommunityfromorganization;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 public class OrganizationMap extends HashMap<String, List<String>> {
 	public OrganizationMap() {
 		super();
 	}
 	public List<String> get(String key) {
 		if (super.get(key) == null) {
 			return new ArrayList<>();
 		}
 		return super.get(key);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
@ -0,0 +1,130 @@
 package eu.dnetlib.dhp.resulttocommunityfromorganization;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class PrepareResultCommunitySet {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultCommunitySet.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final OrganizationMap organizationMap = new Gson()
 			.fromJson(
 				parser.get("organizationtoresultcommunitymap"),
 				OrganizationMap.class);
 		log.info("organizationMap: {}", new Gson().toJson(organizationMap));
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				prepareInfo(spark, inputPath, outputPath, organizationMap);
 			});
 	}
 	private static void prepareInfo(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		OrganizationMap organizationMap) {
 		Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
 		relation.createOrReplaceTempView("relation");
 		String query = "SELECT result_organization.source resultId, result_organization.target orgId, org_set merges "
 			+ "FROM (SELECT source, target "
 			+ "      FROM relation "
 			+ "      WHERE datainfo.deletedbyinference = false "
 			+ "      AND relClass = '"
 			+ RELATION_RESULT_ORGANIZATION_REL_CLASS
 			+ "') result_organization "
 			+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
 			+ "      FROM relation "
 			+ "      WHERE datainfo.deletedbyinference = false "
 			+ "      AND relClass = '"
 			+ RELATION_REPRESENTATIVERESULT_RESULT_CLASS
 			+ "' "
 			+ "      GROUP BY source) organization_organization "
 			+ "ON result_organization.target = organization_organization.source ";
 		Dataset<ResultOrganizations> result_organizationset = spark
 			.sql(query)
 			.as(Encoders.bean(ResultOrganizations.class));
 		result_organizationset
 			.map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class))
 			.filter(Objects::nonNull)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
 		OrganizationMap organizationMap) {
 		return (MapFunction<ResultOrganizations, ResultCommunityList>) value -> {
 			String rId = value.getResultId();
 			Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());
 			String oTarget = value.getOrgId();
 			Set<String> communitySet = new HashSet<>();
 			if (organizationMap.containsKey(oTarget)) {
 				communitySet.addAll(organizationMap.get(oTarget));
 			}
 			if (orgs.isPresent())
 				for (String oId : orgs.get()) {
 					if (organizationMap.containsKey(oId)) {
 						communitySet.addAll(organizationMap.get(oId));
 					}
 				}
 			if (communitySet.size() > 0) {
 				ResultCommunityList rcl = new ResultCommunityList();
 				rcl.setResultId(rId);
 				ArrayList<String> communityList = new ArrayList<>();
 				communityList.addAll(communitySet);
 				rcl.setCommunityList(communityList);
 				return rcl;
 			}
 			return null;
 		};
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java
@ -0,0 +1,26 @@
 package eu.dnetlib.dhp.resulttocommunityfromorganization;
 import java.io.Serializable;
 import java.util.ArrayList;
 public class ResultCommunityList implements Serializable {
 	private String resultId;
 	private ArrayList<String> communityList;
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public ArrayList<String> getCommunityList() {
 		return communityList;
 	}
 	public void setCommunityList(ArrayList<String> communityList) {
 		this.communityList = communityList;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java
@ -0,0 +1,35 @@
 package eu.dnetlib.dhp.resulttocommunityfromorganization;
 import java.io.Serializable;
 import java.util.ArrayList;
 public class ResultOrganizations implements Serializable {
 	private String resultId;
 	private String orgId;
 	private ArrayList<String> merges;
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public String getOrgId() {
 		return orgId;
 	}
 	public void setOrgId(String orgId) {
 		this.orgId = orgId;
 	}
 	public ArrayList<String> getMerges() {
 		return merges;
 	}
 	public void setMerges(ArrayList<String> merges) {
 		this.merges = merges;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
@ -0,0 +1,137 @@
 package eu.dnetlib.dhp.resulttocommunityfromorganization;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.*;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.*;
 import scala.Tuple2;
 public class SparkResultToCommunityFromOrganizationJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromOrganizationJob.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkResultToCommunityFromOrganizationJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String possibleupdatespath = parser.get("preparedInfoPath");
 		log.info("preparedInfoPath: {}", possibleupdatespath);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				if (saveGraph)
 					execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
 			});
 	}
 	private static <R extends Result> void execPropagation(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		Class<R> resultClazz,
 		String possibleUpdatesPath) {
 		Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
 		Dataset<R> result = readPath(spark, inputPath, resultClazz);
 		result
 			.joinWith(
 				possibleUpdates,
 				result.col("id").equalTo(possibleUpdates.col("resultId")),
 				"left_outer")
 			.map(resultCommunityFn(), Encoders.bean(resultClazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
 		return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> {
 			R ret = value._1();
 			Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
 			if (rcl.isPresent()) {
 				ArrayList<String> communitySet = rcl.get().getCommunityList();
 				List<String> contextList = ret
 					.getContext()
 					.stream()
 					.map(con -> con.getId())
 					.collect(Collectors.toList());
 				Result res = new Result();
 				res.setId(ret.getId());
 				List<Context> propagatedContexts = new ArrayList<>();
 				for (String cId : communitySet) {
 					if (!contextList.contains(cId)) {
 						Context newContext = new Context();
 						newContext.setId(cId);
 						newContext
 							.setDataInfo(
 								Arrays
 									.asList(
 										getDataInfo(
 											PROPAGATION_DATA_INFO_TYPE,
 											PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID,
 											PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME)));
 						propagatedContexts.add(newContext);
 					}
 				}
 				res.setContext(propagatedContexts);
 				ret.mergeFrom(res);
 			}
 			return ret;
 		};
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java
@ -0,0 +1,167 @@
 package eu.dnetlib.dhp.resulttocommunityfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class PrepareResultCommunitySetStep1 {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep1.class);
 	private static final String COMMUNITY_LIST_XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')"
 		+ "  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']"
 		+ "  and  $x//CONFIGURATION/context/param[./@name='status']/text() != 'hidden'"
 		+ "  return $x//CONFIGURATION/context/@id/string()";
 	/**
 	 * associates to each result the set of community contexts they are associated to; associates to each target of a
 	 * relation with allowed semantics the set of community context it could possibly inherit from the source of the
 	 * relation
 	 */
 	// TODO
 	private static final String RESULT_CONTEXT_QUERY_TEMPLATE = "select target resultId, community_context  "
 		+ "from (select id, collect_set(co.id) community_context "
 		+ "       from  result "
 		+ "       lateral view explode (context) c as co "
 		+ "       where datainfo.deletedbyinference = false %s group by id) p "
 		+ " JOIN "
 		+ " (select source, target from relation "
 		+ "  where datainfo.deletedbyinference = false %s ) r ON p.id = r.source";
 	/**
 	 * a dataset for example could be linked to more than one publication. For each publication linked to that dataset
 	 * the previous query will produce a row: targetId set of community context the target could possibly inherit with
 	 * the following query there will be a single row for each result linked to more than one result of the result type
 	 * currently being used
 	 */
 	// TODO
 	private static final String RESULT_COMMUNITY_LIST_QUERY = "select resultId , collect_set(co) communityList "
 		+ "from result_context "
 		+ "lateral view explode (community_context) c as co "
 		+ "where length(co) > 0 "
 		+ "group by resultId";
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultCommunitySetStep1.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
 		log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
 		final String isLookupUrl = parser.get("isLookUpUrl");
 		log.info("isLookupUrl: {}", isLookupUrl);
 		final List<String> communityIdList = getCommunityList(isLookupUrl);
 		log.info("communityIdList: {}", new Gson().toJson(communityIdList));
 		final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
 		log.info("resultType: {}", resultType);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				prepareInfo(
 					spark,
 					inputPath,
 					outputPath,
 					allowedsemrel,
 					resultClazz,
 					resultType,
 					communityIdList);
 			});
 	}
 	private static <R extends Result> void prepareInfo(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		List<String> allowedsemrel,
 		Class<R> resultClazz,
 		String resultType,
 		List<String> communityIdList) {
 		final String inputResultPath = inputPath + "/" + resultType;
 		log.info("Reading Graph table from: {}", inputResultPath);
 		final String inputRelationPath = inputPath + "/relation";
 		log.info("Reading relation table from: {}", inputResultPath);
 		Dataset<Relation> relation = readPath(spark, inputRelationPath, Relation.class);
 		relation.createOrReplaceTempView("relation");
 		Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
 		result.createOrReplaceTempView("result");
 		final String outputResultPath = outputPath + "/" + resultType;
 		log.info("writing output results to: {}", outputResultPath);
 		String resultContextQuery = String
 			.format(
 				RESULT_CONTEXT_QUERY_TEMPLATE,
 				getConstraintList(" co.id = '", communityIdList),
 				getConstraintList(" relClass = '", allowedsemrel));
 		Dataset<Row> result_context = spark.sql(resultContextQuery);
 		result_context.createOrReplaceTempView("result_context");
 		spark
 			.sql(RESULT_COMMUNITY_LIST_QUERY)
 			.as(Encoders.bean(ResultCommunityList.class))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(outputResultPath);
 	}
 	public static List<String> getCommunityList(final String isLookupUrl) throws ISLookUpException {
 		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
 		return isLookUp.quickSearchProfile(COMMUNITY_LIST_XQUERY);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java
@ -0,0 +1,101 @@
 package eu.dnetlib.dhp.resulttocommunityfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
 import scala.Tuple2;
 public class PrepareResultCommunitySetStep2 {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep2.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultCommunitySetStep2.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				mergeInfo(spark, inputPath, outputPath);
 			});
 	}
 	private static void mergeInfo(SparkSession spark, String inputPath, String outputPath) {
 		Dataset<ResultCommunityList> resultOrcidAssocCommunityList = readPath(
 			spark, inputPath + "/publication", ResultCommunityList.class)
 				.union(readPath(spark, inputPath + "/dataset", ResultCommunityList.class))
 				.union(readPath(spark, inputPath + "/otherresearchproduct", ResultCommunityList.class))
 				.union(readPath(spark, inputPath + "/software", ResultCommunityList.class));
 		resultOrcidAssocCommunityList
 			.toJavaRDD()
 			.mapToPair(r -> new Tuple2<>(r.getResultId(), r))
 			.reduceByKey(
 				(a, b) -> {
 					if (a == null) {
 						return b;
 					}
 					if (b == null) {
 						return a;
 					}
 					Set<String> community_set = new HashSet<>();
 					a.getCommunityList().stream().forEach(aa -> community_set.add(aa));
 					b
 						.getCommunityList()
 						.stream()
 						.forEach(
 							aa -> {
 								if (!community_set.contains(aa)) {
 									a.getCommunityList().add(aa);
 									community_set.add(aa);
 								}
 							});
 					return a;
 				})
 			.map(c -> c._2())
 			.map(r -> OBJECT_MAPPER.writeValueAsString(r))
 			.saveAsTextFile(outputPath, GzipCodec.class);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
@ -0,0 +1,143 @@
 package eu.dnetlib.dhp.resulttocommunityfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.*;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
 import eu.dnetlib.dhp.schema.oaf.*;
 import scala.Tuple2;
 public class SparkResultToCommunityThroughSemRelJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityThroughSemRelJob.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkResultToCommunityThroughSemRelJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String preparedInfoPath = parser.get("preparedInfoPath");
 		log.info("preparedInfoPath: {}", preparedInfoPath);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				if (saveGraph) {
 					execPropagation(
 						spark, inputPath, outputPath, preparedInfoPath, resultClazz);
 				}
 			});
 	}
 	private static <R extends Result> void execPropagation(
 		SparkSession spark,
 		String inputPath,
 		String outputPath,
 		String preparedInfoPath,
 		Class<R> resultClazz) {
 		Dataset<ResultCommunityList> possibleUpdates = readPath(spark, preparedInfoPath, ResultCommunityList.class);
 		Dataset<R> result = readPath(spark, inputPath, resultClazz);
 		result
 			.joinWith(
 				possibleUpdates,
 				result.col("id").equalTo(possibleUpdates.col("resultId")),
 				"left_outer")
 			.map(contextUpdaterFn(), Encoders.bean(resultClazz))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() {
 		return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> {
 			R ret = value._1();
 			Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
 			if (rcl.isPresent()) {
 				Set<String> context_set = new HashSet<>();
 				ret.getContext().stream().forEach(c -> context_set.add(c.getId()));
 				List<Context> contextList = rcl
 					.get()
 					.getCommunityList()
 					.stream()
 					.map(
 						c -> {
 							if (!context_set.contains(c)) {
 								Context newContext = new Context();
 								newContext.setId(c);
 								newContext
 									.setDataInfo(
 										Arrays
 											.asList(
 												getDataInfo(
 													PROPAGATION_DATA_INFO_TYPE,
 													PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID,
 													PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME)));
 								return newContext;
 							}
 							return null;
 						})
 					.filter(Objects::nonNull)
 					.collect(Collectors.toList());
 				Result r = new Result();
 				r.setId(ret.getId());
 				r.setContext(contextList);
 				ret.mergeFrom(r);
 			}
 			return ret;
 		};
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java
@ -0,0 +1,26 @@
 package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
 import java.io.Serializable;
 public class DatasourceOrganization implements Serializable {
 	private String datasourceId;
 	private String organizationId;
 	public String getDatasourceId() {
 		return datasourceId;
 	}
 	public void setDatasourceId(String datasourceId) {
 		this.datasourceId = datasourceId;
 	}
 	public String getOrganizationId() {
 		return organizationId;
 	}
 	public void setOrganizationId(String organizationId) {
 		this.organizationId = organizationId;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
@ -0,0 +1,122 @@
 package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 public class PrepareResultInstRepoAssociation {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultInstRepoAssociation.class);
 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				PrepareResultInstRepoAssociation.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String datasourceOrganizationPath = parser.get("datasourceOrganizationPath");
 		log.info("datasourceOrganizationPath {}: ", datasourceOrganizationPath);
 		final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
 		log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				readNeededResources(spark, inputPath);
 				prepareDatasourceOrganization(spark, datasourceOrganizationPath);
 				prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath);
 			});
 	}
 	private static void prepareAlreadyLinkedAssociation(
 		SparkSession spark, String alreadyLinkedPath) {
 		String query = "Select source resultId, collect_set(target) organizationSet "
 			+ "from relation "
 			+ "where datainfo.deletedbyinference = false "
 			+ "and relClass = '"
 			+ RELATION_RESULT_ORGANIZATION_REL_CLASS
 			+ "' "
 			+ "group by source";
 		spark
 			.sql(query)
 			.as(Encoders.bean(ResultOrganizationSet.class))
 			// TODO retry to stick with datasets
 			.toJavaRDD()
 			.map(r -> OBJECT_MAPPER.writeValueAsString(r))
 			.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
 	}
 	private static void readNeededResources(SparkSession spark, String inputPath) {
 		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
 		datasource.createOrReplaceTempView("datasource");
 		Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
 		relation.createOrReplaceTempView("relation");
 		Dataset<Organization> organization = readPath(spark, inputPath + "/organization", Organization.class);
 		organization.createOrReplaceTempView("organization");
 	}
 	private static void prepareDatasourceOrganization(
 		SparkSession spark, String datasourceOrganizationPath) {
 		String query = "SELECT source datasourceId, target organizationId "
 			+ "FROM ( SELECT id "
 			+ "FROM datasource "
 			+ "WHERE datasourcetype.classid = '"
 			+ INSTITUTIONAL_REPO_TYPE
 			+ "' "
 			+ "AND datainfo.deletedbyinference = false  ) d "
 			+ "JOIN ( SELECT source, target "
 			+ "FROM relation "
 			+ "WHERE relclass = '"
 			+ RELATION_DATASOURCE_ORGANIZATION_REL_CLASS
 			+ "' "
 			+ "AND datainfo.deletedbyinference = false ) rel "
 			+ "ON d.id = rel.source ";
 		spark
 			.sql(query)
 			.as(Encoders.bean(DatasourceOrganization.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(datasourceOrganizationPath);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java
@ -0,0 +1,26 @@
 package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
 import java.io.Serializable;
 import java.util.ArrayList;
 public class ResultOrganizationSet implements Serializable {
 	private String resultId;
 	private ArrayList<String> organizationSet;
 	public String getResultId() {
 		return resultId;
 	}
 	public void setResultId(String resultId) {
 		this.resultId = resultId;
 	}
 	public ArrayList<String> getOrganizationSet() {
 		return organizationSet;
 	}
 	public void setOrganizationSet(ArrayList<String> organizationSet) {
 		this.organizationSet = organizationSet;
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
@ -0,0 +1,193 @@
 package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.broadcast.Broadcast;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.*;
 import scala.Tuple2;
 public class SparkResultToOrganizationFromIstRepoJob {
 	private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromIstRepoJob.class);
 	private static final String RESULT_ORGANIZATIONSET_QUERY = "SELECT id resultId, collect_set(organizationId) organizationSet "
 		+ "FROM ( SELECT id, organizationId "
 		+ "FROM rels "
 		+ "JOIN cfhb "
 		+ " ON cf = datasourceId     "
 		+ "UNION ALL "
 		+ "SELECT id , organizationId     "
 		+ "FROM rels "
 		+ "JOIN cfhb "
 		+ " ON hb = datasourceId ) tmp "
 		+ "GROUP BY id";
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkResultToOrganizationFromIstRepoJob.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
 		final String datasourceorganization = parser.get("datasourceOrganizationPath");
 		log.info("datasourceOrganizationPath: {}", datasourceorganization);
 		final String alreadylinked = parser.get("alreadyLinkedPath");
 		log.info("alreadyLinkedPath: {}", alreadylinked);
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);
 		final Boolean saveGraph = Optional
 			.ofNullable(parser.get("saveGraph"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("saveGraph: {}", saveGraph);
 		Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
 		SparkConf conf = new SparkConf();
 		conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
 		runWithSparkHiveSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				if (isTest(parser)) {
 					removeOutputDir(spark, outputPath);
 				}
 				if (saveGraph)
 					execPropagation(
 						spark,
 						datasourceorganization,
 						alreadylinked,
 						inputPath,
 						outputPath,
 						resultClazz);
 			});
 	}
 	private static void execPropagation(
 		SparkSession spark,
 		String datasourceorganization,
 		String alreadyLinkedPath,
 		String inputPath,
 		String outputPath,
 		Class<? extends Result> clazz) {
 		Dataset<DatasourceOrganization> ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class);
 		Dataset<ResultOrganizationSet> potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org);
 		Dataset<ResultOrganizationSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class);
 		potentialUpdates
 			.joinWith(
 				alreadyLinked,
 				potentialUpdates.col("resultId").equalTo(alreadyLinked.col("resultId")),
 				"left_outer")
 			.flatMap(createRelationFn(), Encoders.bean(Relation.class))
 			.write()
 			.mode(SaveMode.Append)
 			.option("compression", "gzip")
 			.json(outputPath);
 	}
 	private static FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation> createRelationFn() {
 		return (FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation>) value -> {
 			List<Relation> new_relations = new ArrayList<>();
 			ResultOrganizationSet potential_update = value._1();
 			Optional<ResultOrganizationSet> already_linked = Optional.ofNullable(value._2());
 			List<String> organization_list = potential_update.getOrganizationSet();
 			if (already_linked.isPresent()) {
 				already_linked
 					.get()
 					.getOrganizationSet()
 					.stream()
 					.forEach(
 						rId -> {
 							if (organization_list.contains(rId)) {
 								organization_list.remove(rId);
 							}
 						});
 			}
 			String resultId = potential_update.getResultId();
 			organization_list
 				.stream()
 				.forEach(
 					orgId -> {
 						new_relations
 							.add(
 								getRelation(
 									orgId,
 									resultId,
 									RELATION_ORGANIZATION_RESULT_REL_CLASS,
 									RELATION_RESULTORGANIZATION_REL_TYPE,
 									RELATION_RESULTORGANIZATION_SUBREL_TYPE,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME));
 						new_relations
 							.add(
 								getRelation(
 									resultId,
 									orgId,
 									RELATION_RESULT_ORGANIZATION_REL_CLASS,
 									RELATION_RESULTORGANIZATION_REL_TYPE,
 									RELATION_RESULTORGANIZATION_SUBREL_TYPE,
 									PROPAGATION_DATA_INFO_TYPE,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
 									PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME));
 					});
 			return new_relations.iterator();
 		};
 	}
 	private static <R extends Result> Dataset<ResultOrganizationSet> getPotentialRelations(
 		SparkSession spark,
 		String inputPath,
 		Class<R> resultClazz,
 		Dataset<DatasourceOrganization> ds_org) {
 		Dataset<R> result = readPath(spark, inputPath, resultClazz);
 		result.createOrReplaceTempView("result");
 		createCfHbforResult(spark);
 		ds_org.createOrReplaceTempView("rels");
 		return spark
 			.sql(RESULT_ORGANIZATIONSET_QUERY)
 			.as(Encoders.bean(ResultOrganizationSet.class));
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
@ -0,0 +1,51 @@
 [
  {
    "paramName":"is",
    "paramLongName":"isLookUpUrl",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "pm",
    "paramLongName":"pathMap",
    "paramDescription": "the json path associated to each selection field",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "test",
    "paramLongName": "isTest",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "tg",
    "paramLongName": "taggingConf",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
@ -0,0 +1,54 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -0,0 +1,216 @@
 <workflow-app name="bulk_tagging" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>isLookUpUrl</name>
            <description>the isLookup service endpoint</description>
        </property>
        <property>
            <name>pathMap</name>
            <description>the json path associated to each selection field</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="fork_exec_bulktag"/>
    <fork name="fork_exec_bulktag">
        <path start="join_bulktag_publication"/>
        <path start="join_bulktag_dataset"/>
        <path start="join_bulktag_otherresearchproduct"/>
        <path start="join_bulktag_software"/>
    </fork>
    <action name="join_bulktag_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-publication</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_bulktag_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-dataset</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_bulktag_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-orp</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_bulktag_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-software</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
@ -0,0 +1,44 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": false
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
@ -0,0 +1,38 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "w",
    "paramLongName": "whitelist",
    "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway",
    "paramRequired": true
  },
  {
    "paramName": "at",
    "paramLongName": "allowedtypes",
    "paramDescription": "the allowed datasource types for country propagation",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
@ -0,0 +1,38 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"out",
    "paramLongName":"outputPath",
    "paramDescription": "the output path",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@ -0,0 +1,376 @@
 <workflow-app name="country_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>whitelist</name>
            <description>the white list</description>
        </property>
        <property>
            <name>allowedtypes</name>
            <description>the allowed types</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_datasource_country_association"/>
    <action name="prepare_datasource_country_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareDatasourceCountryAssociation</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--whitelist</arg><arg>${whitelist}</arg>
            <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="fork_join_prepare_result_country"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_join_prepare_result_country">
        <path start="join_prepareresult_publication"/>
        <path start="join_prepareresult_dataset"/>
        <path start="join_prepareresult_otherresearchproduct"/>
        <path start="join_prepareresult_software"/>
    </fork>
    <action name="join_prepareresult_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Publication</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Dataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-ORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Software</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <join name="wait_prepare" to="fork_join_apply_country_propagation"/>
    <fork name="fork_join_apply_country_propagation">
        <path start="join_propagation_publication"/>
        <path start="join_propagation_dataset"/>
        <path start="join_propagation_otherresearchproduct"/>
        <path start="join_propagation_software"/>
    </fork>
    <action name="join_propagation_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForPublications</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForDataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForSoftware</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
@ -0,0 +1,50 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName":"pu",
    "paramLongName":"possibleUpdatesPath",
    "paramDescription": "the path the the association resultId orcid author list can be found",
    "paramRequired": true
  },
  {
    "paramName":"test",
    "paramLongName":"isTest",
    "paramDescription": "true if it is executing a test",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
@ -0,0 +1,38 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"as",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the allowed sematinc relations for propagation",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
@ -0,0 +1,20 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,372 @@
 <workflow-app name="orcid_to_result_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="fork_prepare_assoc_step1"/>
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
        <path start="join_prepare_dataset"/>
        <path start="join_prepare_otherresearchproduct"/>
        <path start="join_prepare_software"/>
    </fork>
    <action name="join_prepare_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Publications</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_assoc_step2"/>
    <action name="prepare_assoc_step2">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase2</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Publication</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json
@ -0,0 +1,33 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"asr",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the types of the allowed datasources. Split by ;",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"pu",
    "paramLongName":"potentialUpdatePath",
    "paramDescription": "the path of the potential updates ",
    "paramRequired": true
  },
  {
    "paramName":"al",
    "paramLongName":"alreadyLinkedPath",
    "paramDescription": "the path of the already linked project result_set",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json
@ -0,0 +1,44 @@
 [
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"pu",
    "paramLongName":"potentialUpdatePath",
    "paramDescription": "the path of the potential updates ",
    "paramRequired": true
  },
  {
    "paramName":"al",
    "paramLongName":"alreadyLinkedPath",
    "paramDescription": "the path of the already linked project result_set",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "test",
    "paramLongName": "isTest",
    "paramDescription": "true if it is a test running",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml
@ -0,0 +1,63 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
 <!--        <value>hadoop-rm3.garr-pa1.d4science.org:8032</value>-->
    </property>
    <property>
        <name>nameNode</name>
 <!--        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>-->
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
 <!--        <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>-->
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
@ -0,0 +1,187 @@
 <workflow-app name="project_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the allowed semantics </description>
        </property>
       <property>
           <name>outputPath</name>
           <description>the output path</description>
       </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_publication"/>
        <path start="copy_dataset"/>
        <path start="copy_orp"/>
        <path start="copy_software"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_project_results_association"/>
    <action name="prepare_project_results_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareProjectResultsAssociation</name>
            <class>eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="apply_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ProjectToResultPropagation</name>
            <class>eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json
@ -0,0 +1,51 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"test",
    "paramLongName":"isTest",
    "paramDescription": "true if it is executing a test",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json
@ -0,0 +1,33 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"ocm",
    "paramLongName":"organizationtoresultcommunitymap",
    "paramDescription": "the map for the association organization communities",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
@ -0,0 +1,239 @@
 <workflow-app name="community_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>organizationtoresultcommunitymap</name>
            <description>organization community map</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_result_communitylist"/>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Prepare-Community-Result-Organization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization-Publication</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization-Dataset</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization-ORP</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization-Software</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
@ -0,0 +1,52 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  },
  {
    "paramName":"test",
    "paramLongName":"isTest",
    "paramDescription": "true if it is executing a test",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
@ -0,0 +1,20 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
@ -0,0 +1,44 @@
 [
  {
    "paramName":"is",
    "paramLongName":"isLookUpUrl",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"as",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the allowed semantic relations for propagation",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
  "paramName":"tn",
  "paramLongName":"resultTableName",
  "paramDescription": "the name of the result table we are currently working on",
  "paramRequired": true
 }
 ]
--- a/Show More
+++ b/Show More