graphInputPath the graph root input path outputPath the graph root output path sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster group graph entities and relations eu.dnetlib.dhp.oa.graph.groupbyid.GroupEntitiesAndRelationsSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --graphInputPath${graphInputPath} --outputPath${workingDir}/grouped_entities yarn cluster Dispatch publications eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication yarn cluster Dispatch datasets eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset yarn cluster Dispatch softwares eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software yarn cluster Dispatch otherresearchproducts eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct yarn cluster Dispatch datasources eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/datasource --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource yarn cluster Dispatch organizations eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/organization --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization yarn cluster Dispatch project eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/project --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project yarn cluster Dispatch relations eu.dnetlib.dhp.oa.graph.groupbyid.DispatchEntitiesSparkJob dhp-graph-mapper-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --entitiesPath${workingDir}/grouped_entities --outputPath${outputPath}/relation --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation