-

2023-12-20 14:26:54 +01:00 · 2023-12-20 14:26:54 +01:00 · 4740c808f7
parent d410ea8a41
commit 4740c808f7
56 changed files with 127 additions and 3681 deletions
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -90,7 +90,7 @@ public class PrepareDatasourceCountryAssociation {
 				(FilterFunction<Datasource>) ds -> !ds.getDataInfo().getDeletedbyinference() &&
 					Optional.ofNullable(ds.getDatasourcetype()).isPresent() &&
 					Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() &&
-					(allowedtypes.contains(ds.getDatasourcetype().getClassid()) ||
+					(allowedtypes.contains(ds.getJurisdiction().getClassid()) ||
 						whitelist.contains(ds.getId())));
 		// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json
@ -1,32 +0,0 @@
 [
 	{
 		"paramName": "p",
 		"paramLongName": "hdfsPath",
 		"paramDescription": "the path where storing the sequential file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "nn",
 		"paramLongName": "hdfsNameNode",
 		"paramDescription": "the name node on hdfs",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pgurl",
 		"paramLongName": "postgresUrl",
 		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pguser",
 		"paramLongName": "postgresUser",
 		"paramDescription": "postgres user",
 		"paramRequired": false
 	},
 	{
 		"paramName": "pgpasswd",
 		"paramLongName": "postgresPassword",
 		"paramDescription": "postgres password",
 		"paramRequired": false
 	}
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
@ -1,38 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "pm",
    "paramLongName":"pathMap",
    "paramDescription": "the json path associated to each selection field",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "tg",
    "paramLongName": "taggingConf",
    "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
    "paramRequired": false
  },
  {
    "paramName": "bu",
    "paramLongName": "baseURL",
    "paramDescription": "this parameter is to specify the api to be queried (beta or production)",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json
@ -1,21 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "wp",
    "paramLongName": "workingPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json
@ -1,41 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "dmp",
    "paramLongName":"datasourceMapPath",
    "paramDescription": "the path where the association datasource master has been stored",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "wp",
    "paramLongName": "workingPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "rt",
    "paramLongName": "resultType",
    "paramDescription": "the result type",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
@ -1,54 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@ -1,120 +0,0 @@
 <workflow-app name="bulk_tagging" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>pathMap</name>
            <description>the json path associated to each selection field</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the community API base URL</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="exec_bulktag"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="exec_bulktag"/>
    <action name="exec_bulktag">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>bulkTagging-result</name>
            <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --num-executors=${sparkExecutorNumber}
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--outputPath</arg><arg>${outputPath}/</arg>
            <arg>--pathMap</arg><arg>${pathMap}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json
@ -1,32 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json
@ -1,32 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "w",
    "paramLongName": "whitelist",
    "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway",
    "paramRequired": true
  },
  {
    "paramName": "at",
    "paramLongName": "allowedtypes",
    "paramDescription": "the allowed datasource types for country propagation",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json
@ -1,38 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"out",
    "paramLongName":"outputPath",
    "paramDescription": "the output path",
    "paramRequired": true
  },
  {
    "paramName":"w",
    "paramLongName":"workingPath",
    "paramDescription": "the working path",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@ -1,375 +0,0 @@
 <workflow-app name="country_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>whitelist</name>
            <description>the white list</description>
        </property>
        <property>
            <name>allowedtypes</name>
            <description>the allowed types</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_datasource_country_association"/>
    <action name="prepare_datasource_country_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareDatasourceCountryAssociation</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--whitelist</arg><arg>${whitelist}</arg>
            <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="fork_join_prepare_result_country"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_join_prepare_result_country">
        <path start="join_prepareresult_publication"/>
        <path start="join_prepareresult_dataset"/>
        <path start="join_prepareresult_otherresearchproduct"/>
        <path start="join_prepareresult_software"/>
    </fork>
    <action name="join_prepareresult_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Publication</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
            <arg>--workingPath</arg><arg>${workingDir}/workingP</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Dataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
            <arg>--workingPath</arg><arg>${workingDir}/workingD</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-ORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--workingPath</arg><arg>${workingDir}/workingO</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepareresult_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>prepareResultCountry-Software</name>
            <class>eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
            <arg>--workingPath</arg><arg>${workingDir}/workingS</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
        </spark>
        <ok to="wait_prepare"/>
        <error to="Kill"/>
    </action>
    <join name="wait_prepare" to="fork_join_apply_country_propagation"/>
    <fork name="fork_join_apply_country_propagation">
        <path start="join_propagation_publication"/>
        <path start="join_propagation_dataset"/>
        <path start="join_propagation_otherresearchproduct"/>
        <path start="join_propagation_software"/>
    </fork>
    <action name="join_propagation_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForPublications</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForDataset</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForORP</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>countryPropagationForSoftware</name>
            <class>eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json
@ -1,50 +0,0 @@
 [
  {
    "paramName":"gp",
    "paramLongName":"graphPath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"lp",
    "paramLongName":"leavesPath",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"cp",
    "paramLongName":"childParentPath",
    "paramDescription": "path where to store/find association from datasource and organization",
    "paramRequired": true
  },
  {
    "paramName":"rp",
    "paramLongName":"resultOrgPath",
    "paramDescription": "path where to store/find already linked results and organizations",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "rep",
    "paramLongName": "relationPath",
    "paramDescription": "the path where to store the selected subset of relations",
    "paramRequired": false
  },
  {
    "paramName": "pop",
    "paramLongName": "projectOrganizationPath",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json
@ -1,62 +0,0 @@
 [
  {
    "paramName":"rep",
    "paramLongName":"relationPath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"lp",
    "paramLongName":"leavesPath",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"cp",
    "paramLongName":"childParentPath",
    "paramDescription": "path where to store/find association from datasource and organization",
    "paramRequired": true
  },
  {
    "paramName":"rp",
    "paramLongName":"resultOrgPath",
    "paramDescription": "path where to store/find already linked results and organizations",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "wd",
    "paramLongName": "workingDir",
    "paramDescription": "true if it is a test running",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "it",
    "paramLongName": "iterations",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": false
  },
  {
    "paramName": "pop",
    "paramLongName": "projectOrganizationPath",
    "paramDescription": "the number of iterations to be computed",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml
@ -1,105 +0,0 @@
 <workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>iterations</name>
            <description>the number of hops to be done up on the hierarchy</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_info"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_info">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultProjectOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--graphPath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg>
            <arg>--projectOrganizationPath</arg><arg>${workingDir}/preparedInfo/projectOrganizationPath</arg>
            <arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg>
        </spark>
        <ok to="apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_resulttoorganization_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultProjectToOrganizationFromSemRel</name>
            <class>eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg>
            <arg>--projectOrganizationPath</arg><arg>${workingDir}/preparedInfo/projectOrganizationPath</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--workingDir</arg><arg>${workingDir}/working</arg>
            <arg>--iterations</arg><arg>${iterations}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
@ -1,44 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName":"pu",
    "paramLongName":"possibleUpdatesPath",
    "paramDescription": "the path the the association resultId orcid author list can be found",
    "paramRequired": true
  },
  {
    "paramName":"test",
    "paramLongName":"isTest",
    "paramDescription": "true if it is executing a test",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
@ -1,38 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"as",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the allowed sematinc relations for propagation",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
@ -1,20 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
@ -1,371 +0,0 @@
 <workflow-app name="orcid_to_result_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="fork_prepare_assoc_step1"/>
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
        <path start="join_prepare_dataset"/>
        <path start="join_prepare_otherresearchproduct"/>
        <path start="join_prepare_software"/>
    </fork>
    <action name="join_prepare_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Publications</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase1-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_assoc_step2"/>
    <action name="prepare_assoc_step2">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-PreparePhase2</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Publication</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Dataset</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-ORP</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ORCIDPropagation-Software</name>
            <class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.speculation=false
                --conf spark.hadoop.mapreduce.map.speculative=false
                --conf spark.hadoop.mapreduce.reduce.speculative=false
            </spark-opts>
            <arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json
@ -1,33 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"asr",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the types of the allowed datasources. Split by ;",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"pu",
    "paramLongName":"potentialUpdatePath",
    "paramDescription": "the path of the potential updates ",
    "paramRequired": true
  },
  {
    "paramName":"al",
    "paramLongName":"alreadyLinkedPath",
    "paramDescription": "the path of the already linked project result_set",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json
@ -1,44 +0,0 @@
 [
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"pu",
    "paramLongName":"potentialUpdatePath",
    "paramDescription": "the path of the potential updates ",
    "paramRequired": true
  },
  {
    "paramName":"al",
    "paramLongName":"alreadyLinkedPath",
    "paramDescription": "the path of the already linked project result_set",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "test",
    "paramLongName": "isTest",
    "paramDescription": "true if it is a test running",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml
@ -1,63 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
 <!--        <value>hadoop-rm3.garr-pa1.d4science.org:8032</value>-->
    </property>
    <property>
        <name>nameNode</name>
 <!--        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>-->
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
 <!--        <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>-->
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
@ -1,184 +0,0 @@
 <workflow-app name="project_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the allowed semantics </description>
        </property>
       <property>
           <name>outputPath</name>
           <description>the output path</description>
       </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_publication"/>
        <path start="copy_dataset"/>
        <path start="copy_orp"/>
        <path start="copy_software"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_project_results_association"/>
    <action name="prepare_project_results_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareProjectResultsAssociation</name>
            <class>eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="apply_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ProjectToResultPropagation</name>
            <class>eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json
@ -1,28 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json
@ -1,33 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "bu",
    "paramLongName": "baseURL",
    "paramDescription": "the base URL to the community API to use",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
@ -1,147 +0,0 @@
 <workflow-app name="community_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the community API base URL</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_result_communitylist"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_result_communitylist"/>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Prepare-Community-Result-Organization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="exec-propagation"/>
        <error to="Kill"/>
    </action>
    <action name="exec-propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromorganization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--outputPath</arg><arg>${outputPath}/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json
@ -1,28 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json
@ -1,28 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName": "bu",
    "paramLongName": "baseURL",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml
@ -1,144 +0,0 @@
 <workflow-app name="community_to_result_propagation_project" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="prepare_result_communitylist"/>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Prepare-Community-Result-Organization</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--production</arg><arg>${production}</arg>
        </spark>
        <ok to="exec-propagation"/>
        <error to="Kill"/>
    </action>
    <action name="exec-propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>community2resultfromproject</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=6
                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
            <arg>--outputPath</arg><arg>${outputPath}/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
@ -1,52 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "p",
    "paramLongName": "preparedInfoPath",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": true
  },
  {
    "paramName":"test",
    "paramLongName":"isTest",
    "paramDescription": "true if it is executing a test",
    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
@ -1,20 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
@ -1,44 +0,0 @@
 [
  {
    "paramName":"bu",
    "paramLongName":"baseURL",
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"as",
    "paramLongName":"allowedsemrels",
    "paramDescription": "the allowed semantic relations for propagation",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "true if the spark session is managed, false otherwise",
    "paramRequired": false
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  },
  {
  "paramName":"tn",
  "paramLongName":"resultTableName",
  "paramDescription": "the name of the result table we are currently working on",
  "paramRequired": true
 }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@ -1,366 +0,0 @@
 <workflow-app name="result_to_community_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>allowedsemrels</name>
            <description>the semantic relationships allowed for propagation</description>
        </property>
        <property>
            <name>baseURL</name>
            <description>the baseurl for the comminity APIs</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
    </parameters>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="copy_wait"/>
        <error to="Kill"/>
    </action>
    <join name="copy_wait" to="fork_prepare_assoc_step1"/>
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
        <path start="join_prepare_dataset"/>
        <path start="join_prepare_otherresearchproduct"/>
        <path start="join_prepare_software"/>
    </fork>
    <action name="join_prepare_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Publications</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--baseURL</arg><arg>${baseURL}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Dataset</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-ORP</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="join_prepare_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunitySemRel-PreparePhase1-Software</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </spark>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_assoc_step2"/>
    <action name="prepare_assoc_step2">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>ResultToCommunityEmRelPropagation-PreparePhase2</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
        </spark>
        <ok to="fork-join-exec-propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork-join-exec-propagation">
        <path start="join_propagate_publication"/>
        <path start="join_propagate_dataset"/>
        <path start="join_propagate_otherresearchproduct"/>
        <path start="join_propagate_software"/>
    </fork>
    <action name="join_propagate_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Publication</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Dataset</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-ORP</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagate_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Result2CommunitySemRelPropagation-Software</name>
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
            <arg>--saveGraph</arg><arg>${saveGraph}</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json
@ -1,20 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },{
  "paramName": "o",
  "paramLongName": "outputPath",
  "paramDescription": "institutional repositories that should not be considered for the propagation",
  "paramRequired": false
 }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json
@ -1,32 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"wp",
    "paramLongName":"workingPath",
    "paramDescription": "the working path",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },{
  "paramName": "bl",
  "paramLongName": "blacklist",
  "paramDescription": "institutional repositories that should not be considered for the propagation",
  "paramRequired": false
 }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json
@ -1,56 +0,0 @@
 [
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
    "paramDescription": "the path of the sequencial file to read",
    "paramRequired": true
  },
  {
    "paramName":"h",
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
  },
  {
    "paramName":"sg",
    "paramLongName":"saveGraph",
    "paramDescription": "true if the new version of the graph must be saved",
    "paramRequired": false
  },
  {
    "paramName":"dop",
    "paramLongName":"datasourceOrganizationPath",
    "paramDescription": "path where to store/find association from datasource and organization",
    "paramRequired": true
  },
  {
    "paramName":"alp",
    "paramLongName":"alreadyLinkedPath",
    "paramDescription": "path where to store/find already linked results and organizations",
    "paramRequired": true
  },
  {
    "paramName": "ssm",
    "paramLongName": "isSparkSessionManaged",
    "paramDescription": "the path where prepared info have been stored",
    "paramRequired": false
  },
  {
    "paramName": "test",
    "paramLongName": "isTest",
    "paramDescription": "true if it is a test running",
    "paramRequired": false
  },
  {
    "paramName":"tn",
    "paramLongName":"resultTableName",
    "paramDescription": "the name of the result table we are currently working on",
    "paramRequired": true
  },
  {
    "paramName": "out",
    "paramLongName": "outputPath",
    "paramDescription": "the path used to store temporary output files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@ -1,277 +0,0 @@
 <workflow-app name="affiliation_from_instrepo_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>sets the outputPath</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="copy_entities"/>
        <error to="Kill"/>
    </action>
    <fork name="copy_entities">
        <path start="copy_relation"/>
        <path start="copy_publication"/>
        <path start="copy_dataset"/>
        <path start="copy_orp"/>
        <path start="copy_software"/>
        <path start="copy_organization"/>
        <path start="copy_projects"/>
        <path start="copy_datasources"/>
    </fork>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/relation</arg>
            <arg>${nameNode}/${outputPath}/relation</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_publication">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/publication</arg>
            <arg>${nameNode}/${outputPath}/publication</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_dataset">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/dataset</arg>
            <arg>${nameNode}/${outputPath}/dataset</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_orp">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
            <arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_software">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/software</arg>
            <arg>${nameNode}/${outputPath}/software</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/organization</arg>
            <arg>${nameNode}/${outputPath}/organization</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_projects">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/project</arg>
            <arg>${nameNode}/${outputPath}/project</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasources">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${sourcePath}/datasource</arg>
            <arg>${nameNode}/${outputPath}/datasource</arg>
        </distcp>
        <ok to="wait"/>
        <error to="Kill"/>
    </action>
    <join name="wait" to="prepare_result_organization_association"/>
    <action name="prepare_result_organization_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
            <arg>--blacklist</arg><arg>${blacklist}</arg>
        </spark>
        <ok to="fork_join_apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_join_apply_resulttoorganization_propagation">
        <path start="join_propagation_publication"/>
        <path start="join_propagation_dataset"/>
        <path start="join_propagation_otherresearchproduct"/>
        <path start="join_propagation_software"/>
    </fork>
    <action name="join_propagation_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForPublications</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForDataset</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_otherresearchproduct">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForORP</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <action name="join_propagation_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromInstRepoPropagationForSoftware</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--outputPath</arg><arg>${outputPath}/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
        </spark>
        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties
@ -1,12 +1,12 @@
 sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched
-resumeFrom=AffiliationSemanticRelation
+resumeFrom=CountryPropagation
 allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo
 allowedsemrelsresultproject=isSupplementedBy;isSupplementTo
 allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo
-datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14
+datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48
-allowedtypes=pubsrepository::institutional
+#allowedtypes=pubsrepository::institutional
 allowedtypes=Institutional
 outputPath=/tmp/miriam/enrichment_one_step
 organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"],  "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"],  "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"],  "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"],  "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"],  "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"],  "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"],  "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"],  "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"],   "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"],  "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"],  "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"],  "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"],  "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"],  "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"],  "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"],  "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"],  "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"],  "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"],  "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"],  "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"],  "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"],  "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"],  "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"],  "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"],  "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"],  "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"],  "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"],  "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"],  "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"],  "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"],  "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"],  "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]}
 pathMap ={"author":"$['author'][*]['fullname']", \
  "title":"$['title'][*]['value']",\
  "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml
@ -29,10 +29,6 @@
            <name>outputPath</name>
            <description>the output path</description>
        </property>
        <property>
            <name>organizationtoresultcommunitymap</name>
            <description>organization community map</description>
        </property>
        <property>
            <name>pathMap</name>
            <description>the json path associated to each selection field</description>
@ -315,7 +311,7 @@
                </property>
                <property>
                    <name>allowedtypes</name>
-                    <value>${allowedtupes}</value>
+                    <value>${allowedtypes}</value>
                </property>
            </configuration>
        </sub-workflow>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml
@ -26,12 +26,20 @@
        </configuration>
    </global>
-    <start to="exec_bulktag"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="exec_bulktag"/>
        <error to="Kill"/>
    </action>
    <action name="exec_bulktag">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
@ -25,12 +25,20 @@
        </configuration>
    </global>
-    <start to="prepare_datasource_country_association"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_datasource_country_association"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_datasource_country_association">
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml
@ -18,12 +18,20 @@
        </configuration>
    </global>
-    <start to="prepare_info"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_info"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_info">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml
@ -22,12 +22,20 @@
        </configuration>
    </global>
-    <start to="prepare_project_results_association"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_project_results_association"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_project_results_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml
@ -21,12 +21,21 @@
        </configuration>
    </global>
-    <start to="prepare_result_communitylist"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_result_communitylist"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -75,9 +84,9 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/resulttocommunityfromorganization/</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/communityorganization/resulttocommunityfromorganization/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml
@ -21,12 +21,19 @@
        </configuration>
    </global>
-    <start to="prepare_result_communitylist"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
-
+    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_result_communitylist"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_result_communitylist">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml
@ -18,13 +18,20 @@
        </property>
    </parameters>
-    <start to="fork_prepare_assoc_step1"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
-
+    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="fork_prepare_assoc_step1"/>
        <error to="Kill"/>
    </action>
    <fork name="fork_prepare_assoc_step1">
        <path start="join_prepare_publication"/>
@ -41,8 +48,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -70,8 +79,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -99,8 +110,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -128,8 +141,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -159,8 +174,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -191,8 +208,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -220,8 +239,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -249,8 +270,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -278,8 +301,10 @@
            <class>eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
+                --executor-cores=6
-                --executor-memory=${sparkExecutorMemory}
+                --executor-memory=5G
                --conf spark.executor.memoryOverhead=3g
                --conf spark.sql.shuffle.partitions=3284
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -295,10 +320,11 @@
            <arg>--outputPath</arg><arg>${workingDir}/communitysemrel/software</arg>
        </spark>
-        <ok to="End"/>
+        <ok to="wait2"/>
        <error to="Kill"/>
    </action>
    <join name="wait2" to="End"/>
    <end name="End"/>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@ -21,12 +21,21 @@
        </configuration>
    </global>
-    <start to="prepare_result_organization_association"/>
+    <start to="reset_outputpath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="reset_outputpath">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="prepare_result_organization_association"/>
        <error to="Kill"/>
    </action>
    <action name="prepare_result_organization_association">
        <spark xmlns="uri:oozie:spark-action:0.2">
@ -79,7 +88,7 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/affiliationinstrepo/publication/relation</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/affiliationInstRepo/publication/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -108,7 +117,7 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/affiliationinstrepo/dataset/relation</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/affiliationInstRepo/dataset/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -137,7 +146,7 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/affiliationinstrepo/otherresearchproduct/relation</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/affiliationInstRepo/otherresearchproduct/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -166,7 +175,7 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/affiliationinstrepo/software/relation</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/affiliationInstRepo/software/relation</arg>
            <arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
            <arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
@ -197,7 +206,7 @@
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
            </spark-opts>
            <arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
-            <arg>--sourcePath</arg><arg>${workingDir}/affiliationinstrepo/</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/affiliationInstRepo/</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml
@ -1,58 +0,0 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
        <name>spark2ExtraListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
    </property>
    <property>
        <name>spark2SqlQueryExecutionListeners</name>
        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
    </property>
    <property>
        <name>sparkExecutorNumber</name>
        <value>4</value>
    </property>
    <property>
        <name>sparkDriverMemory</name>
        <value>15G</value>
    </property>
    <property>
        <name>sparkExecutorMemory</name>
        <value>6G</value>
    </property>
    <property>
        <name>sparkExecutorCores</name>
        <value>1</value>
    </property>
    <property>
        <name>spark2MaxExecutors</name>
        <value>50</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml
@ -1,97 +0,0 @@
 <workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
    </parameters>
    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>
        </configuration>
    </global>
    <start to="prepare_info"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="prepare_info">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>PrepareResultOrganizationAssociation</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--graphPath</arg><arg>${sourcePath}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
        </spark>
        <ok to="apply_resulttoorganization_propagation"/>
        <error to="Kill"/>
    </action>
    <action name="apply_resulttoorganization_propagation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>resultToOrganizationFromSemRel</name>
            <class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
            <jar>dhp-enrichment-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.dynamicAllocation.enabled=true
                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
            <arg>--outputPath</arg><arg>${sourcePath}</arg>
            <arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
            <arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
            <arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
            <arg>--workingDir</arg><arg>${workingDir}/affiliationSemanticRelation/working</arg>
            <arg>--iterations</arg><arg>${iterations}</arg>
        </spark>
        <ok to="reset_workingDir"/>
        <error to="Kill"/>
    </action>
    <action name="reset_workingDir">
        <fs>
            <delete path="${workingDir}"/>
            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh
@ -1,3 +1,3 @@
 #!/bin/bash
 curl -LSs $1 | hdfs dfs -put - $2/$3
-curl -LSs  http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt
+#curl -LSs  http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt