graphBasePath the input graph base path workingPath path of the working directory graphOutputPath path of the output graph filterInvisible whether filter out invisible entities after merge isLookupUrl the URL address of the lookUp service sparkDriverMemory heap memory for driver process sparkExecutorMemory heap memory for individual executor sparkExecutorMemoryOverhead off-heap memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location

${jobTracker}

${nameNode}

mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster Propagate Relations eu.dnetlib.dhp.oa.dedup.SparkPropagateRelation dhp-dedup-openaire-${projectVersion}.jar

--executor-memory=${sparkExecutorMemory} --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=15000

--graphBasePath${graphBasePath} --graphOutputPath${graphOutputPath} --workingPath${workingPath} yarn cluster group graph entities eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar

--graphInputPath${graphBasePath} --checkpointPath${workingPath}/grouped_entities --outputPath${graphOutputPath} --isLookupUrl${isLookupUrl} --filterInvisible${filterInvisible}