graphBasePath
the raw graph base path
isLookUpUrl
the address of the lookUp service
actionSetId
id of the actionSet
workingPath
path of the working directory
dedupGraphPath
path of the dedup graph
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
yarn-cluster
cluster
Create Merge Relations
eu.dnetlib.dhp.dedup.SparkCreateConnectedComponent
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --conf
spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf
spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf
spark.sql.warehouse.dir="/user/hive/warehouse"
-mtyarn-cluster
--i${graphBasePath}
--w${workingPath}
--la${isLookUpUrl}
--asi${actionSetId}
${jobTracker}
${nameNode}
yarn-cluster
cluster
Create Dedup Record
eu.dnetlib.dhp.dedup.SparkCreateDedupRecord
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --conf
spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf
spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf
spark.sql.warehouse.dir="/user/hive/warehouse"
-mtyarn-cluster
--i${graphBasePath}
--w${workingPath}
--la${isLookUpUrl}
--asi${actionSetId}
${jobTracker}
${nameNode}
yarn-cluster
cluster
Create Dedup Record
eu.dnetlib.dhp.dedup.SparkUpdateEntity
dhp-dedup-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory} --conf
spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf
spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf
spark.sql.warehouse.dir="/user/hive/warehouse"
-mtyarn-cluster
--i${graphBasePath}
--w${workingPath}
--la${isLookUpUrl}
--asi${actionSetId}
--o${dedupGraphPath}