sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor inputPathCrossref the Crossref input path crossrefDumpPath the Crossref dump path MAGDumpPath the MAG dump working path inputPathMAG the MAG working path inputPathOrcid the ORCID input path workingPathOrcid the ORCID working path ${jobTracker} ${nameNode} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} ${wf:conf('resumeFrom') eq 'Skip'} ${wf:conf('resumeFrom') eq 'ImportCrossRef'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${jobTracker} ${nameNode} mapred.job.queue.name ${queueName} download.sh ${url} ${crossrefDumpPath} ${crossrefdumpfilename} download.sh ${jobTracker} ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz --workingPath${crossrefDumpPath} --outputPath${crossrefDumpPath}/files/ yarn-cluster cluster SparkUnpackCrossrefEntries eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn-cluster --sourcePath${crossrefDumpPath}/files --targetPath${crossrefDumpPath}/crossref_unpack/ yarn-cluster cluster SparkGenerateCrossrefDataset eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset dhp-doiboost-${projectVersion}.jar --executor-memory=7G --executor-cores=2 --driver-memory=7G --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn-cluster --sourcePath${crossrefDumpPath}/crossref_unpack/ --targetPath${inputPathCrossref}/crossref_ds yarn-cluster cluster Convert Mag to Dataset eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${MAGDumpPath} --targetPath${inputPathMAG}/dataset --masteryarn-cluster yarn-cluster cluster Convert ORCID to Dataset eu.dnetlib.doiboost.orcid.SparkPreprocessORCID dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathOrcid} --workingPath${workingPathOrcid} --masteryarn-cluster