diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 992c2ded1..cd4d79ab7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -24,6 +24,7 @@ + @@ -31,51 +32,100 @@ - eu.dnetlib.dhp.actionmanager.project.GetFile + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV --hdfsNameNode${nameNode} --fileURL${projectFileURL} - --hdfsPath${workingDir}/projects.csv + --hdfsPath${workingDir}/projects + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject - + - + - eu.dnetlib.dhp.actionmanager.project.GetFile + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV --hdfsNameNode${nameNode} --fileURL${programmeFileURL} - --hdfsPath${workingDir}/programme.csv + --hdfsPath${workingDir}/programme + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProgramme + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --programmePath${workingDir}/programme + --outputPath${workingDir}/preparedProgramme + + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProjects + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/projects + --outputPath${workingDir}/preparedProjects + + + + + + + + yarn + cluster + ProjectProgrammeAS + eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/preparedProjects + --programmePath${workingDir}/preparedProgramme + --outputPath/tmp/h2020programme + - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file