diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
index ea2f0b96c..41d7905c2 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
@@ -1,32 +1,38 @@
[
- {
- "paramName":"mt",
- "paramLongName":"master",
- "paramDescription": "should be local or yarn",
- "paramRequired": true
- },
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
- {
- "paramName":"as",
- "paramLongName":"allowedsemrels",
- "paramDescription": "the allowed sematinc relations for propagation",
- "paramRequired": true
- },
- {
- "paramName":"wu",
- "paramLongName":"writeUpdate",
- "paramDescription": "true if the update must be writte. No double check if information is already present",
- "paramRequired": true
- },
{
"paramName":"sg",
"paramLongName":"saveGraph",
"paramDescription": "true if the new version of the graph must be saved",
"paramRequired": true
+ },
+ {
+ "paramName":"h",
+ "paramLongName":"hive_metastore_uris",
+ "paramDescription": "the hive metastore uris",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
index ea2f0b96c..08648d61a 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json
@@ -1,10 +1,4 @@
[
- {
- "paramName":"mt",
- "paramLongName":"master",
- "paramDescription": "should be local or yarn",
- "paramRequired": true
- },
{
"paramName":"s",
"paramLongName":"sourcePath",
@@ -18,15 +12,27 @@
"paramRequired": true
},
{
- "paramName":"wu",
- "paramLongName":"writeUpdate",
- "paramDescription": "true if the update must be writte. No double check if information is already present",
+ "paramName":"h",
+ "paramLongName":"hive_metastore_uris",
+ "paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
- "paramName":"sg",
- "paramLongName":"saveGraph",
- "paramDescription": "true if the new version of the graph must be saved",
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
index 08648d61a..1a67134a6 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json
@@ -5,18 +5,6 @@
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
- {
- "paramName":"as",
- "paramLongName":"allowedsemrels",
- "paramDescription": "the allowed sematinc relations for propagation",
- "paramRequired": true
- },
- {
- "paramName":"h",
- "paramLongName":"hive_metastore_uris",
- "paramDescription": "the hive metastore uris",
- "paramRequired": true
- },
{
"paramName": "out",
"paramLongName": "outputPath",
@@ -28,11 +16,5 @@
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
- },
- {
- "paramName":"tn",
- "paramLongName":"resultTableName",
- "paramDescription": "the name of the result table we are currently working on",
- "paramRequired": true
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
index b4fca5e58..a3c043bf0 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
@@ -8,18 +8,6 @@
allowedsemrels
the semantic relationships allowed for propagation
-
-
-
-
-
-
-
-
-
-
-
-
writeUpdate
writes the information found for the update. No double check done if the information is already present
@@ -30,12 +18,133 @@
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels${allowedsemrels}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels${allowedsemrels}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels${allowedsemrels}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels${allowedsemrels}
+
+
+
+
+
+
+
${jobTracker}
@@ -67,5 +176,145 @@
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc
+ --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob3
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/mergedOrcidAssoc
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/publication
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob3
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/mergedOrcidAssoc
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/dataset
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob3
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/mergedOrcidAssoc
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/otherresearchproduct
+
+
+
+
+
+
+ yarn
+ cluster
+ ORCIDpropagationFromSemRel
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob3
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/mergedOrcidAssoc
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/software
+
+
+
+
+
\ No newline at end of file