diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json
index 8d2133075..c74496350 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json
@@ -11,18 +11,6 @@
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
- {
- "paramName":"wu",
- "paramLongName":"writeUpdate",
- "paramDescription": "true if the update must be writte. No double check if information is already present",
- "paramRequired": true
- },
- {
- "paramName":"sg",
- "paramLongName":"saveGraph",
- "paramDescription": "true if the new version of the graph must be saved",
- "paramRequired": true
- },
{
"paramName":"dop",
"paramLongName":"datasourceOrganizationPath",
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json
index 215f69fcc..9da25874a 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json
@@ -1,10 +1,4 @@
[
- {
- "paramName":"mt",
- "paramLongName":"master",
- "paramDescription": "should be local or yarn",
- "paramRequired": true
- },
{
"paramName":"s",
"paramLongName":"sourcePath",
@@ -16,5 +10,53 @@
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
+ },
+ {
+ "paramName":"wu",
+ "paramLongName":"writeUpdate",
+ "paramDescription": "true if the update must be writte. No double check if information is already present",
+ "paramRequired": true
+ },
+ {
+ "paramName":"sg",
+ "paramLongName":"saveGraph",
+ "paramDescription": "true if the new version of the graph must be saved",
+ "paramRequired": true
+ },
+ {
+ "paramName":"dop",
+ "paramLongName":"datasourceOrganizationPath",
+ "paramDescription": "path where to store/find association from datasource and organization",
+ "paramRequired": true
+ },
+ {
+ "paramName":"alp",
+ "paramLongName":"alreadyLinkedPath",
+ "paramDescription": "path where to store/find already linked results and organizations",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "the path where prepared info have been stored",
+ "paramRequired": false
+ },
+ {
+ "paramName": "test",
+ "paramLongName": "isTest",
+ "paramDescription": "true if it is a test running",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
index ea3a4d922..2744ea92b 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml
@@ -19,4 +19,40 @@
hive_metastore_uris
thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+ spark2YarnHistoryServerAddress
+ http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
+
+
+ spark2EventLogDir
+ /user/spark/spark2ApplicationHistory
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+
+
+ sparkExecutorNumber
+ 4
+
+
+ sparkDriverMemory
+ 15G
+
+
+ sparkExecutorMemory
+ 6G
+
+
+ sparkExecutorCores
+ 1
+
+
+ spark2MaxExecutors
+ 50
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
index 08a07f497..19e7e6507 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@@ -5,47 +5,227 @@
the source path
- sparkDriverMemory
- memory for driver process
+ writeUpdate
+ writes the information found for the update. No double check done if the information is already present
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
+ saveGraph
+ writes new version of the graph after the propagation step
-
-
+
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- AffiliationPropagation
- eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob
- dhp-propagation-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
-
- -mt yarn-cluster
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
-
-
-
-
+
+
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ ${nameNode}/${sourcePath}/relation
+ ${nameNode}/${workingDir}/resulttoorganization_propagation/relation
+
+
+
+
+
+
+
+ yarn
+ cluster
+ PrepareResultOrganizationAssociation
+ eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --datasourceOrganizationPath${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization
+ --alreadyLinkedPath${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ resultToOrganizationFromInstRepoPropagationForPublications
+ eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}/publication
+ --hive_metastore_uris${hive_metastore_uris}
+ --writeUpdate${writeUpdate}
+ --saveGraph${saveGraph}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/resulttoorganization_propagation/relation
+ --datasourceOrganizationPath${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization
+ --alreadyLinkedPath${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked
+
+
+
+
+
+
+
+ yarn
+ cluster
+ resultToOrganizationFromInstRepoPropagationForDataset
+ eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}/dataset
+ --hive_metastore_uris${hive_metastore_uris}
+ --writeUpdate${writeUpdate}
+ --saveGraph${saveGraph}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/resulttoorganization_propagation/relation
+ --datasourceOrganizationPath${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization
+ --alreadyLinkedPath${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked
+
+
+
+
+
+
+
+ yarn
+ cluster
+ resultToOrganizationFromInstRepoPropagationForORP
+ eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --hive_metastore_uris${hive_metastore_uris}
+ --writeUpdate${writeUpdate}
+ --saveGraph${saveGraph}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/resulttoorganization_propagation/relation
+ --datasourceOrganizationPath${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization
+ --alreadyLinkedPath${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked
+
+
+
+
+
+
+
+ yarn
+ cluster
+ resultToOrganizationFromInstRepoPropagationForSoftware
+ eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}/software
+ --hive_metastore_uris${hive_metastore_uris}
+ --writeUpdate${writeUpdate}
+ --saveGraph${saveGraph}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/resulttoorganization_propagation/relation
+ --datasourceOrganizationPath${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization
+ --alreadyLinkedPath${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
\ No newline at end of file