outputPath
path where to store the action set
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
mapred.job.queue.name
${queueName}
download.sh
${downloadH2020Projects}
${projectPath}
HADOOP_USER_NAME=${wf:user()}
download.sh
eu.dnetlib.dhp.actionmanager.project.utils.ExtractFromZip
--hdfsNameNode${nameNode}
--inputPath${projectPath}
--outputPath${workingDir}/
eu.dnetlib.dhp.actionmanager.project.utils.ReadProjects
--hdfsNameNode${nameNode}
--inputPath${workingDir}/json/project.json
--outputPath${workingDir}/projects
${jobTracker}
${nameNode}
mapred.job.queue.name
${queueName}
download.sh
${downloadH2020Programme}
${programmePath}
HADOOP_USER_NAME=${wf:user()}
download.sh
eu.dnetlib.dhp.actionmanager.project.utils.ExtractFromZip
--hdfsNameNode${nameNode}
--inputPath${programmePath}
--outputPath${workingDir}/downloadedProgramme/
eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV
--hdfsNameNode${nameNode}
--fileURL${workingDir}/downloadedProgramme/csv/programme.csv
--hdfsPath${workingDir}/programme
--classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme
eu.dnetlib.dhp.actionmanager.project.utils.ReadTopics
--hdfsNameNode${nameNode}
--inputPath${workingDir}/json/topics.json
--outputPath${workingDir}/topic
eu.dnetlib.dhp.actionmanager.project.ReadProjectsFromDB
--hdfsPath${workingDir}/dbProjects
--hdfsNameNode${nameNode}
--postgresUrl${postgresURL}
--postgresUser${postgresUser}
--postgresPassword${postgresPassword}
yarn
cluster
PrepareProgramme
eu.dnetlib.dhp.actionmanager.project.PrepareProgramme
dhp-aggregation-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--programmePath${workingDir}/programme
--outputPath${workingDir}/preparedProgramme
yarn
cluster
PrepareProjects
eu.dnetlib.dhp.actionmanager.project.PrepareProjects
dhp-aggregation-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--projectPath${workingDir}/projects
--outputPath${workingDir}/preparedProjects
--dbProjectPath${workingDir}/dbProjects
yarn
cluster
ProjectProgrammeAS
eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob
dhp-aggregation-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--projectPath${workingDir}/preparedProjects
--programmePath${workingDir}/preparedProgramme
--topicPath${workingDir}/topic
--outputPath${outputPath}