graphBasePath the raw graph base path isLookUpUrl the address of the lookUp service actionSetId id of the actionSet workingPath path for the working directory cutConnectedComponent max number of elements in a connected component sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster Create Similarity Relations eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels dhp-dedup-openaire-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphBasePath${graphBasePath} --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetIdOpenorgs} --workingPath${workingPath} --numPartitions1000 yarn cluster Copy OpenOrgs Sim Rels eu.dnetlib.dhp.oa.dedup.SparkCopyOpenorgsSimRels dhp-dedup-openaire-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphBasePath${graphBasePath} --isLookUpUrl${isLookUpUrl} --workingPath${workingPath} --actionSetId${actionSetIdOpenorgs} --numPartitions1000 yarn cluster Create Merge Relations eu.dnetlib.dhp.oa.dedup.SparkCreateMergeRels dhp-dedup-openaire-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphBasePath${graphBasePath} --workingPath${workingPath} --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetIdOpenorgs} --cutConnectedComponent${cutConnectedComponent} yarn cluster Prepare Organization Relations eu.dnetlib.dhp.oa.dedup.SparkPrepareOrgRels dhp-dedup-openaire-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphBasePath${graphBasePath} --workingPath${workingPath} --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetIdOpenorgs} --dbUrl${dbUrl} --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} --numConnections20 yarn cluster Prepare New Organizations eu.dnetlib.dhp.oa.dedup.SparkPrepareNewOrgs dhp-dedup-openaire-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphBasePath${graphBasePath} --workingPath${workingPath} --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetIdOpenorgs} --apiUrl${apiUrl} --dbUrl${dbUrl} --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} --numConnections20