postgresURL
the url of the postgress server to query
postgresUser
the username to access the postgres db
postgresPassword
the postgres password
sourcePath
the source path
outputPath
the graph output path
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${nameNode}/${sourcePath}/publication
${nameNode}/${outputPath}/publication
${nameNode}/${sourcePath}/dataset
${nameNode}/${outputPath}/dataset
${nameNode}/${sourcePath}/otherresearchproduct
${nameNode}/${outputPath}/otherresearchproduct
${nameNode}/${sourcePath}/software
${nameNode}/${outputPath}/software
${nameNode}/${sourcePath}/organization
${nameNode}/${outputPath}/organization
${nameNode}/${sourcePath}/project
${nameNode}/${outputPath}/project
${nameNode}/${sourcePath}/datasource
${nameNode}/${outputPath}/datasource
eu.dnetlib.dhp.blacklist.ReadBlacklistFromDB
--hdfsPath${workingDir}/blacklist
--hdfsNameNode${nameNode}
--postgresUrl${postgresURL}
--postgresUser${postgresUser}
--postgresPassword${postgresPassword}
yarn
cluster
PrepareMergedRelation
eu.dnetlib.dhp.blacklist.PrepareMergedRelationJob
dhp-blacklist-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--sourcePath${sourcePath}/relation
--outputPath${workingDir}/mergesRelation
--hive_metastore_uris${hive_metastore_uris}
yarn
cluster
ApplyBlacklist
eu.dnetlib.dhp.blacklist.SparkRemoveBlacklistedRelationJob
dhp-blacklist-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--sourcePath${sourcePath}/relation
--outputPath${outputPath}/relation
--hdfsPath${workingDir}/blacklist
--mergesPath${workingDir}/mergesRelation