hive_db_name
the target hive database name
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
${wf:conf('reuseRecords') eq false}
${wf:conf('reuseRecords') eq true}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
yarn
cluster
build_adjacency_lists
eu.dnetlib.dhp.graph.SparkXmlRecordBuilderJob
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-mt yarn
-is ${isLookupUrl}
--sourcePath${sourcePath}
--outputPath${outputPath}
${jobTracker}
${nameNode}
yarn
cluster
to_solr_index
eu.dnetlib.dhp.graph.SparkXmlIndexingJob
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCoresForIndexing}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.instances=${sparkExecutorInstances}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-mt yarn
-is ${isLookupUrl}
--sourcePath${outputPath}/xml
--format${format}
--batchSize${batchSize}