sourcePath the source path outputPath the output path communities the communities whose products should be dumped sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities --outputPath${outputPath}/community --nameNode${nameNode} --communities${communities} yarn cluster select results ids connected to communities and dump relation eu.dnetlib.dhp.oa.graph.dump.csv.SparkSelectResultsAndDumpRelations dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --workingPath${outputPath}/workingDir --outputPath${outputPath} --communities${communities} yarn cluster select results from publication eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults dump-${projectVersion}.jar --executor-memory=9G --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --workingPath${outputPath}/workingDir --resultTypepublication yarn cluster select results from dataset eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --workingPath${outputPath}/workingDir --resultTypedataset yarn cluster select results from other eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --workingPath${outputPath}/workingDir --resultTypeotherresearchproduct yarn cluster select results from software eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --workingPath${outputPath}/workingDir --resultTypesoftware Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster Dump single results eu.dnetlib.dhp.oa.graph.dump.csv.SparkMoveOnSigleDir dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --workingPath${outputPath}/workingDir --outputPath${outputPath} eu.dnetlib.dhp.oa.graph.dump.MakeTar --hdfsPath${outputPath} --nameNode${nameNode} --sourcePath${workingDir}/tar eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS --hdfsPath${outputPath} --nameNode${nameNode} --accessToken${accessToken} --connectionUrl${connectionUrl} --metadata${metadata} --conceptRecordId${conceptRecordId} --depositionType${depositionType} --depositionId${depositionId}