graphBasePath
the raw graph base path
workingPath
path of the working directory
dedupGraphPath
path of the dedup graph
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
yarn-cluster
cluster
Update Entity
eu.dnetlib.dhp.dedup.SparkUpdateEntity
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mtyarn-cluster
--i${graphBasePath}
--w${workingPath}
--o${dedupGraphPath}
yarn-cluster
cluster
Update Relations
eu.dnetlib.dhp.dedup.SparkPropagateRelation
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mtyarn-cluster
--i${graphBasePath}
--o${dedupGraphPath}
--w${workingPath}