${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
${wf:conf('resumeFrom') eq 'DownloadDump'}
${wf:conf('resumeFrom') eq 'ExtractContent'}
${wf:conf('resumeFrom') eq 'ReadContent'}
${wf:conf('resumeFrom') eq 'CreateAS'}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
mapred.job.queue.name
${queueName}
download.sh
${filelist}
${inputPath}/Original
HADOOP_USER_NAME=${wf:user()}
download.sh
${jobTracker}
${nameNode}
mapred.job.queue.name
${queueName}
download_corr.sh
${filecorrespondence}
${inputPath}/correspondence
HADOOP_USER_NAME=${wf:user()}
download.sh
eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs
--hdfsNameNode${nameNode}
--inputPath${inputPath}/Original
--outputPath${inputPath}/Extracted
eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs
--hdfsNameNode${nameNode}
--inputPath${inputPath}/correspondence
--outputPath${inputPath}/correspondence_extracted
yarn
cluster
Produces the AS for OC
eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/Extracted
--outputPath${inputPath}/JSON
--delimiter${delimiter}
--hdfsNameNode${nameNode}
yarn
cluster
Produces the AS for OC
eu.dnetlib.dhp.actionmanager.opencitations.MapOCIdsInPids
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}
--outputPath${outputPathExtraction}
yarn
cluster
Produces the AS for OC
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${outputPathExtraction}
--outputPath${outputPath}