diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
index 125a82037..bae6dedc5 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json
@@ -1,21 +1,32 @@
[
{
"paramName":"s",
- "paramLongName":"sourcePath",
+ "paramLongName":"graphPath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
- "paramLongName": "outputPath",
+ "paramLongName": "targetPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
- },
+ }, {
+ "paramName": "o",
+ "paramLongName": "orcidPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+}, {
+ "paramName": "w",
+ "paramLongName": "workingDir",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+},
{
- "paramName": "ssm",
- "paramLongName": "isSparkSessionManaged",
- "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramName": "m",
+ "paramLongName": "matchingSource",
+ "paramDescription": "the path used to store temporary output files",
"paramRequired": false
}
+
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
index 8eaa79c53..211ab0200 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml
@@ -92,21 +92,14 @@
-
+
-
-
-
-
-
-
-
-
+
yarn
cluster
ORCIDPropagation-PreparePhase1-Publications
- eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
+ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkPropagateOrcidAuthor
dhp-enrichment-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -119,239 +112,17 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=8000
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${workingDir}/orcid/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
+ --graphPath${sourcePath}/
+ --orcidPath${sourcePath}/
+ --workingDir${workingDir}/
+ --targetPath${outputPath}/
+ --matchingSourcegraph
-
+
-
-
- yarn
- cluster
- ORCIDPropagation-PreparePhase1-Dataset
- eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${workingDir}/orcid/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-PreparePhase1-ORP
- eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${workingDir}/orcid/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-PreparePhase1-Software
- eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${workingDir}/orcid/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
-
-
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-PreparePhase2
- eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${workingDir}/orcid/targetOrcidAssoc
- --outputPath${workingDir}/orcid/mergedOrcidAssoc
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-Publication
- eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=15000
-
- --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
- --sourcePath${sourcePath}/publication
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${outputPath}/publication
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-Dataset
- eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=8000
-
- --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
- --sourcePath${sourcePath}/dataset
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${outputPath}/dataset
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-ORP
- eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=8000
-
- --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
- --sourcePath${sourcePath}/otherresearchproduct
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${outputPath}/otherresearchproduct
-
-
-
-
-
-
-
- yarn
- cluster
- ORCIDPropagation-Software
- eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob
- dhp-enrichment-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.sql.shuffle.partitions=4000
-
- --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc
- --sourcePath${sourcePath}/software
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${outputPath}/software
-
-
-
-
-
-