workingPath the working dir base path token access token shell_cmd wget -O /tmp/last_modified.csv.tar http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar ; hdfs dfs -copyFromLocal /tmp/last_modified.csv.tar /data/orcid_activities_2020/last_modified.csv.tar ; rm -f /tmp/last_modified.csv.tar the shell command that downloads the lambda file from orcid containing last orcid update informations sparkExecutorNumber 20 sparkDriverMemory 7G memory for driver process sparkExecutorMemory 2G memory for individual executor sparkExecutorCores 1 number of cores used by single executor spark2MaxExecutors 20 oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${jobTracker} ${nameNode} bash -c ${shell_cmd} ${jobTracker} ${nameNode} eu.dnetlib.doiboost.orcid.OrcidDownloader -w${workingPath}/ -n${nameNode} -flast_modified.csv.tar -odownloads/ -t${token} yarn-cluster cluster GenLastModifiedSeq eu.dnetlib.doiboost.orcid.SparkGenLastModifiedSeq dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} -w${workingPath}/ -n${nameNode} -flast_modified.csv.tar -olast_modified.seq -t- yarn-cluster cluster DownloadOrcidAuthors eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors dhp-doiboost-${projectVersion}.jar --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} -w${workingPath}/ -n${nameNode} -flast_modified.seq -odownloads/updated_authors -t${token}