diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index 125a82037..bae6dedc5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -1,21 +1,32 @@ [ { "paramName":"s", - "paramLongName":"sourcePath", + "paramLongName":"graphPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, { "paramName": "out", - "paramLongName": "outputPath", + "paramLongName": "targetPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true - }, + }, { + "paramName": "o", + "paramLongName": "orcidPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true +}, { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true +}, { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", + "paramName": "m", + "paramLongName": "matchingSource", + "paramDescription": "the path used to store temporary output files", "paramRequired": false } + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 8eaa79c53..211ab0200 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -92,21 +92,14 @@ - + - - - - - - - - + yarn cluster ORCIDPropagation-PreparePhase1-Publications - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkPropagateOrcidAuthor dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -119,239 +112,17 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=8000 - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} + --graphPath${sourcePath}/ + --orcidPath${sourcePath}/ + --workingDir${workingDir}/ + --targetPath${outputPath}/ + --matchingSourcegraph - + - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase2 - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${workingDir}/orcid/targetOrcidAssoc - --outputPath${workingDir}/orcid/mergedOrcidAssoc - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-Publication - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=15000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - ORCIDPropagation-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=8000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - ORCIDPropagation-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=8000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - ORCIDPropagation-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=4000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - -