sourcePath the source path outputPath the output path hostedByMapPath the output path sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} ${wf:conf('resumeFrom') eq 'ProduceHBM'} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel --replacetrue yarn-cluster Produce the new HostedByMap eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --datasourcePath${sourcePath}/datasource --workingPath${workingDir} --outputPath${hostedByMapPath} --masteryarn-cluster yarn-cluster Prepare info to apply the hbm eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --hostedByMapPath${hostedByMapPath} --preparedInfoPath${workingDir}/preparedInfo --graphPath${sourcePath} --masteryarn-cluster yarn-cluster Apply hbm to result eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --outputPath${outputPath}/publication --preparedInfoPath${workingDir}/preparedInfo --graphPath${sourcePath} --masteryarn-cluster yarn-cluster Apply hbm to datasource eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --outputPath${outputPath}/datasource --preparedInfoPath${workingDir}/preparedInfo --graphPath${sourcePath} --masteryarn-cluster