From e94ae0b1de11f31dad6367605538371d9fe13d6b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:18:11 +0200 Subject: [PATCH] Hosted By Map - extention of the workflow to consider also the application of the map to publications and datasources --- .../graph/hostedbymap/oozie_app/workflow.xml | 106 ++++++++++++++++-- 1 file changed, 99 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index ecf6c3b31..30e500da3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -9,6 +9,10 @@ outputPath the output path + + hostedByMapPath + the output path + sparkDriverMemory memory for driver process @@ -65,23 +69,30 @@ - + + + + + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - + + - @@ -134,13 +145,94 @@ --datasourcePath${sourcePath}/datasource --workingPath${workingDir} - --outputPath${outputPath} + --outputPath${hostedByMapPath} --masteryarn-cluster - + + + + yarn-cluster + Prepare info to apply the hbm + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hostedByMapPath${hostedByMapPath} + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + + + yarn-cluster + Apply hbm to result + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/publication + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Apply hbm to datasource + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/datasource + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + +