crossrefDumpPath
the working dir base path
inputPathCrossref
the working dir base path
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
2
number of cores used by single executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
--hdfsServerUri${nameNode}
--crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz
--workingPath${crossrefDumpPath}
--outputPath${workingDir}/files/
yarn-cluster
cluster
SparkGenerateCrossrefDataset
eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
dhp-doiboost-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--masteryarn-cluster
--sourcePath${crossrefDumpPath}/files
--targetPath${inputPathCrossref}/crossref_ds
yarn-cluster
cluster
SparkGenerateCrossrefDataset
eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset
dhp-doiboost-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--masteryarn-cluster
--sourcePath${inputPathCrossref}/crossref_ds
--targetPath${inputPathCrossref}/crossref_ds_updates