38 changed files with 2863 additions and 19 deletions
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java
@ -167,4 +167,9 @@ public class Utils implements Serializable {
 			});
 		return projectMap;
 	}
 	public static List<String> getCommunityIdList(String baseURL) throws IOException {
 		return getValidCommunities(baseURL).stream()
 				.map(community -> community.getId()).collect(Collectors.toList());
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -151,7 +151,13 @@ public class SparkBulkTagJob {
 					.write()
 					.mode(SaveMode.Overwrite)
 					.option("compression", "gzip")
-					.json(outputPath + e.name());
+					.json(outputPath + e.name());//writing the tagging in the working dir for entity
 				readPath(spark, outputPath + e.name(), resultClazz) //copy the tagging in the actual result output path
 						.write()
 						.mode(SaveMode.Overwrite)
 						.option("compression","gzip")
 						.json(inputPath + e.name());
 			});
 	}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
-				removeOutputDir(spark, outputPath);
+				//removeOutputDir(spark, outputPath);
 				prepareDatasourceCountryAssociation(
 					spark,
 					Arrays.asList(parser.get("whitelist").split(";")),
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java
@ -97,6 +97,12 @@ public class SparkCountryPropagationJob {
 			.mode(SaveMode.Overwrite)
 			.json(outputPath);
 		readPath(spark, outputPath, resultClazz)
 				.write()
 				.mode(SaveMode.Overwrite)
 				.option("compression","gzip")
 				.json(sourcePath);
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
@ -92,6 +92,12 @@ public class SparkResultToCommunityFromOrganizationJob {
 						.mode(SaveMode.Overwrite)
 						.option("compression", "gzip")
 						.json(outputPath + e.name());
 					readPath(spark, outputPath + e.name(), resultClazz)
 							.write()
 							.mode(SaveMode.Overwrite)
 							.option("compression","gzip")
 							.json(inputPath + e.name());
 				}
 			});
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java
@ -53,7 +53,7 @@ public class PrepareResultCommunitySet {
 		log.info("outputPath: {}", outputPath);
 		final String baseURL = parser.get("baseURL");
-		log.info("baseUEL: {}", baseURL);
+		log.info("baseURL: {}", baseURL);
 		final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java
@ -102,6 +102,12 @@ public class SparkResultToCommunityFromProject implements Serializable {
 						.mode(SaveMode.Overwrite)
 						.option("compression", "gzip")
 						.json(outputPath + e.name());
 					readPath(spark, outputPath + e.name(), resultClazz)
 							.write()
 							.mode(SaveMode.Overwrite)
 							.option("compression","gzip")
 							.json(inputPath + e.name());
 				}
 			});
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java
@ -4,9 +4,11 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import eu.dnetlib.dhp.api.Utils;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.*;
@ -26,11 +28,6 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class PrepareResultCommunitySetStep1 {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep1.class);
 	private static final String COMMUNITY_LIST_XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')"
 		+ "  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']"
 		+ "  and  $x//CONFIGURATION/context/param[./@name='status']/text() != 'hidden'"
 		+ "  return $x//CONFIGURATION/context/@id/string()";
 	/**
 	 * associates to each result the set of community contexts they are associated to; associates to each target of a
 	 * relation with allowed semantics the set of community context it could possibly inherit from the source of the
@ -88,10 +85,10 @@ public class PrepareResultCommunitySetStep1 {
 		final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
 		log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
-		final String isLookupUrl = parser.get("isLookUpUrl");
+		final String baseURL = parser.get("baseURL");
-		log.info("isLookupUrl: {}", isLookupUrl);
+		log.info("baseURL: {}", baseURL);
-		final List<String> communityIdList = getCommunityList(isLookupUrl);
+		final List<String> communityIdList = getCommunityList(baseURL);
 		log.info("communityIdList: {}", new Gson().toJson(communityIdList));
 		final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
@ -159,9 +156,8 @@ public class PrepareResultCommunitySetStep1 {
 			.json(outputResultPath);
 	}
-	public static List<String> getCommunityList(final String isLookupUrl) throws ISLookUpException {
+	public static List<String> getCommunityList(final String baseURL) throws IOException {
-		ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
+		return Utils.getCommunityIdList(baseURL);
 		return isLookUp.quickSearchProfile(COMMUNITY_LIST_XQUERY);
 	}
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
@ -100,6 +100,12 @@ public class SparkResultToCommunityThroughSemRelJob {
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
 			.json(outputPath);
 		readPath(spark, outputPath, resultClazz)
 				.write()
 				.mode(SaveMode.Overwrite)
 				.option("compression","gzip")
 				.json(inputPath);
 	}
 	private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() {
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
@ -1,7 +1,7 @@
 [
  {
-    "paramName":"is",
+    "paramName":"bu",
-    "paramLongName":"isLookUpUrl",
+    "paramLongName":"baseURL",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@ -9,8 +9,8 @@
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
-            <name>isLookUpUrl</name>
+            <name>baseURL</name>
-            <description>the isLookup service endpoint</description>
+            <description>the baseurl for the comminity APIs</description>
        </property>
        <property>
            <name>outputPath</name>
@ -116,7 +116,7 @@
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
-            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
+            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml
@ -0,0 +1,30 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>hiveMetastoreUris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>hiveJdbcUrl</name>
        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
    </property>
    <property>
        <name>hiveDbName</name>
        <value>openaire</value>
    </property>
    <property>
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt
@ -0,0 +1,10 @@
 ## This is a classpath-based import file (this header is required)
 orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app
 bulk_tagging classpath  eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app
 affiliation_inst_repo classpath  eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app
 entity_semantic_relation classpath eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app
 community_organization classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app
 result_project classpath eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app
 community_project classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app
 community_sem_rel classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app
 country_propagation classpath eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml
@ -0,0 +1,324 @@
 <workflow-app name="enrichment_main" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrelsorcidprop</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>allowedsemrelsresultproject</name>
            <description>the allowed semantics </description>
        </property>
        <property>
            <name>allowedsemrelscommunitysemrel</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>datasourceWhitelistForCountryPropagation</name>
            <description>the white list</description>
        </property>
        <property>
            <name>allowedtypes</name>
            <description>the allowed types</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
        <property>
            <name>organizationtoresultcommunitymap</name>
            <description>organization community map</description>
        </property>
        <property>
            <name>pathMap</name>
            <description>the json path associated to each selection field</description>
        </property>
        <property>
            <name>blacklist</name>
            <description>list of datasources in blacklist for the affiliation from instrepo propagation</description>
        </property>
        <property>
            <name>hiveDbName</name>
            <description>the target hive database name</description>
        </property>
        <property>
            <name>hiveJdbcUrl</name>
            <description>hive server jdbc url</description>
        </property>
        <property>
            <name>hiveMetastoreUris</name>
            <description>hive server metastore URIs</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>oozieActionShareLibForSpark2</name>
            <description>oozie action sharelib for spark 2.*</description>
        </property>
        <property>
            <name>spark2ExtraListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
            <description>spark 2.* extra listeners classname</description>
        </property>
        <property>
            <name>spark2SqlQueryExecutionListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
            <description>spark 2.* sql query execution listeners classname</description>
        </property>
        <property>
            <name>spark2YarnHistoryServerAddress</name>
            <description>spark 2.* yarn history server address</description>
        </property>
        <property>
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>mapreduce.job.queuename</name>
                <value>${queueName}</value>
            </property>
            <property>
                <name>oozie.launcher.mapred.job.queue.name</name>
                <value>${oozieLauncherQueueName}</value>
            </property>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="resumeFrom"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <decision name="resumeFrom">
        <switch>
            <case to="bulk_tagging">${wf:conf('resumeFrom') eq 'BulkTagging'}</case>
            <case to="affiliation_inst_repo">${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'}</case>
            <case to="affiliation_semantic_relation">${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'}</case>
            <case to="community_organization">${wf:conf('resumeFrom') eq 'CommunityOrganization'}</case>
            <case to="result_project">${wf:conf('resumeFrom') eq 'ResultProject'}</case>
            <case to="community_project">${wf:conf('resumeFrom') eq 'CommunityProject'}</case>
            <case to="community_sem_rel">${wf:conf('resumeFrom') eq 'CommunitySemanticRelation'}</case>
            <case to="country_propagation">${wf:conf('resumeFrom') eq 'CountryPropagation'}</case>
            <default to="orcid_propagation"/>
        </switch>
    </decision>
    <action name="orcid_propagation">
        <sub-workflow>
            <app-path>${wf:appPath()}/orcid_propagation
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${sourcePath}</value>
                </property>
                <property>
                    <name>allowedsemrels</name>
                    <value>${allowedsemrelsorcidprop}</value>
                </property>
                <property>
                    <name>outputPath</name>
                    <value>${outputPath}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="bulk_tagging" />
        <error to="Kill" />
    </action>
    <action name="bulk_tagging">
        <sub-workflow>
            <app-path>${wf:appPath()}/bulk_tagging
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>baseURL</name>
                    <value>${baseURL}</value>
                </property>
                <property>
                    <name>pathMap</name>
                    <value>${pathMap}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="affiliation_inst_repo" />
        <error to="Kill" />
    </action>
    <action name="affiliation_inst_repo">
        <sub-workflow>
            <app-path>${wf:appPath()}/affiliation_inst_repo
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>blacklist</name>
                    <value>${blacklist}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="affiliation_semantic_relation" />
        <error to="Kill" />
    </action>
    <action name="affiliation_semantic_relation">
        <sub-workflow>
            <app-path>${wf:appPath()}/affiliation_semantic_relation
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="community_organization" />
        <error to="Kill" />
    </action>
    <action name="community_organization">
        <sub-workflow>
            <app-path>${wf:appPath()}/community_organization
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>baseURL</name>
                    <value>${baseURL}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="result_project" />
        <error to="Kill" />
    </action>
    <action name="result_project">
        <sub-workflow>
            <app-path>${wf:appPath()}/result_project
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>allowedsemrels</name>
                    <value>${allowedsemrelsresultproject}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="community_project" />
        <error to="Kill" />
    </action>
    <action name="community_project">
        <sub-workflow>
            <app-path>${wf:appPath()}/community_project
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="community_sem_rel" />
        <error to="Kill" />
    </action>
    <action name="community_sem_rel">
        <sub-workflow>
            <app-path>${wf:appPath()}/community_sem_rel
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>allowedsemrels</name>
                    <value>${allowedsemrelscommunitysemrel}</value>
                </property>
                <property>
                    <name>baseURL</name>
                    <value>${baseURL}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="country_propagation" />
        <error to="Kill" />
    </action>
    <action name="country_propagation">
        <sub-workflow>
            <app-path>${wf:appPath()}/country_propagation
            </app-path>
            <propagate-configuration/>
            <configuration>
                <property>
                    <name>sourcePath</name>
                    <value>${outputPath}</value>
                </property>
                <property>
                    <name>whitelist</name>
                    <value>${datasourceWhitelistForCountryPropagation}</value>
                </property>
                <property>
                    <name>allowedtypes</name>
                    <value>${allowedtupes}</value>
                </property>
            </configuration>
        </sub-workflow>
        <ok to="End" />
        <error to="Kill" />
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml
@ -0,0 +1,54 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml
@ -0,0 +1,66 @@
 <workflow-app name="bulk_tagging" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>pathMap</name>
            <description>the json path associated to each selection field</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>The URL to access the community APIs</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="exec_bulktag"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="exec_bulktag">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-publication</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--workingPath</arg><arg>${workingDir}/bulktag/</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
@ -0,0 +1,316 @@
 <workflow-app name="country_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>whitelist</name>
            <description>the white list</description>
        </property>
        <property>
            <name>allowedtypes</name>
            <description>the allowed types</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_datasource_country_association"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_datasource_country_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareDatasourceCountryAssociation</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--whitelist</arg><arg>${whitelist}</arg>
            <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
        </spark>
        <ok to="fork_prepare_result_country"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_prepare_result_country">
        <path start="prepareresult_publication"/>
        <path start="prepareresult_dataset"/>
        <path start="prepareresult_otherresearchproduct"/>
        <path start="prepareresult_software"/>
    </fork>
    <action name="prepareresult_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Publication</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="prepareresult_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Dataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="prepareresult_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-ORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="prepareresult_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Software</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <join name="wait_prepare" to="fork_join_apply_country_propagation"/>
    <fork name="fork_apply_country_propagation">
        <path start="propagation_publication"/>
        <path start="propagation_dataset"/>
        <path start="propagation_otherresearchproduct"/>
        <path start="propagation_software"/>
    </fork>
    <action name="propagation_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForPublications</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="propagation_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForDataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="propagation_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="propagation_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForSoftware</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--workingPath</arg><arg>${workingDir}/country</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
                    </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="reset_workingDir"/>
    <action name="reset_workingDir">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json
@ -0,0 +1,50 @@
 [
  {
    "paramName":"gp",
    "paramLongName":"graphPath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"lp",
    "paramLongName":"leavesPath",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"cp",
    "paramLongName":"childParentPath",
    "paramDescription": "path where to store/find association from datasource and organization",
    "paramRequired": true
  },
  {
    "paramName":"rp",
    "paramLongName":"resultOrgPath",
    "paramDescription": "path where to store/find already linked results and organizations",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "rep",
    "paramLongName": "relationPath",
    "paramDescription": "the path where to store the selected subset of relations",
    "paramRequired": false
  },
  {
    "paramName": "pop",
    "paramLongName": "projectOrganizationPath",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json
@ -0,0 +1,62 @@
 [
  {
    "paramName":"rep",
    "paramLongName":"relationPath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"lp",
    "paramLongName":"leavesPath",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"cp",
    "paramLongName":"childParentPath",
    "paramDescription": "path where to store/find association from datasource and organization",
    "paramRequired": true
  },
  {
    "paramName":"rp",
    "paramLongName":"resultOrgPath",
    "paramDescription": "path where to store/find already linked results and organizations",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "wd",
    "paramLongName": "workingDir",
    "paramDescription": "true if it is a test running",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "it",
    "paramLongName": "iterations",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": false
  },
  {
    "paramName": "pop",
    "paramLongName": "projectOrganizationPath",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,93 @@
 <workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_info"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_info">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--graphPath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--projectOrganizationPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath</arg>
            <arg>--relationPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/relation</arg>
        </spark>
        <ok to="apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_resulttoorganization_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromSemRel</name>
            <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--relationPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/relation</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--projectOrganizationPath</arg><arg>${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--workingDir</arg><arg>${workingDir}/entitiesSemanticRelation/working</arg>
            <arg>--iterations</arg><arg>${iterations}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,369 @@
 <workflow-app name="orcid_to_result_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_relations"/>
    <action name="join_prepare_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Publications</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_assoc_step2"/>
    <action name="prepare_assoc_step2">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase2</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/orcidprop</arg>
            <arg>--outputPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Publication</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="reset_workingDir"/>
    <action name="reset_workingDir">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml
@ -0,0 +1,63 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
 <!--        <value>hadoop-rm3.garr-pa1.d4science.org:8032</value>-->
    </property>
    <property>
        <name>nameNode</name>
 <!--        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>-->
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
 <!--        <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>-->
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml
@ -0,0 +1,94 @@
 <workflow-app name="project_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the allowed semantics </description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_project_results_association"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_project_results_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareProjectResultsAssociation</name>
            <class>eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="apply_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ProjectToResultPropagation</name>
            <class>eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="reset_workingDir"/>
        <error to="Kill"/>
    </action>
    <action name="reset_workingDir">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml
@ -0,0 +1,88 @@
 <workflow-app name="community_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the baseURL from where to reach the community APIs</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_result_communitylist"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Prepare-Community-Result-Organization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="exec-propagation"/>
        <error to="Kill"/>
    </action>
    <action name="exec-propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization-Publication</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--outputPath</arg><arg>${workingDir}/resulttocommunityfromorganization/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json
@ -0,0 +1,28 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json
@ -0,0 +1,28 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "bu",
    "paramLongName": "baseURL",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml
@ -0,0 +1,90 @@
 <workflow-app name="community_to_result_propagation_project" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the base URL to use to select the right community APIs</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_result_communitylist"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Prepare-Community-Result-Organization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitythroughproject/preparedInfo/resultCommunityList</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="exec-propagation"/>
        <error to="Kill"/>
    </action>
    <action name="exec-propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromproject</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/communitythroughproject/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitythroughproject/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,305 @@
 <workflow-app name="result_to_community_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the isLookup service endpoint</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="fork_prepare_assoc_step1"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
        <path start="join_prepare_dataset"/>
        <path start="join_prepare_otherresearchproduct"/>
        <path start="join_prepare_software"/>
    </fork>
    <action name="join_prepare_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Publications</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Dataset</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-ORP</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Software</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_assoc_step2"/>
    <action name="prepare_assoc_step2">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunityEmRelPropagation-PreparePhase2</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Publication</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Dataset</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-ORP</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/otherresearchproduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Software</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/software</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@ -0,0 +1,182 @@
 <workflow-app name="affiliation_from_instrepo_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>blacklist</name>
            <description>The list of institutional repositories that should not be used for the propagation</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_result_organization_association"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_result_organization_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--workingPath</arg><arg>${workingDir}/affiliationInstRepo</arg>
            <arg>--blacklist</arg><arg>${blacklist}</arg>
        </spark>
        <ok to="fork_join_apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_join_apply_resulttoorganization_propagation">
        <path start="join_propagation_publication"/>
        <path start="join_propagation_dataset"/>
        <path start="join_propagation_otherresearchproduct"/>
        <path start="join_propagation_software"/>
    </fork>
    <action name="join_propagation_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForPublications</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForDataset</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForORP</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForSoftware</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,58 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,97 @@
 <workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_info"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_info">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--graphPath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
        </spark>
        <ok to="apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_resulttoorganization_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromSemRel</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
            <arg>--outputPath</arg><arg>${sourcePath}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--workingDir</arg><arg>${workingDir}/affiliationSemanticRelation/working</arg>
            <arg>--iterations</arg><arg>${iterations}</arg>
        </spark>
        <ok to="reset_workingDir"/>
        <error to="Kill"/>
    </action>
    <action name="reset_workingDir">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>