graphInputPath the path where the graph is stored outputDir the path where the the generated data will be stored datasourceIdWhitelist - a white list (comma separeted, - for empty list) of datasource ids datasourceTypeWhitelist - a white list (comma separeted, - for empty list) of datasource types datasourceIdBlacklist - a black list (comma separeted, - for empty list) of datasource ids topicWhitelist * a white list (comma separeted, * for all) of topics esEventIndexName the elasticsearch index name for events esNotificationsIndexName the elasticsearch index name for notifications esIndexHost the elasticsearch host esBatchWriteRetryCount 8 an ES configuration property esBatchWriteRetryWait 60s an ES configuration property esBatchSizeEntries 200 an ES configuration property esNodesWanOnly true an ES configuration property maxIndexedEventsForDsAndTopic the max number of events for each couple (ds/topic) brokerApiBaseUrl the url of the broker service api brokerDbUrl the url of the broker database brokerDbUser the user of the broker database brokerDbPassword the password of the broker database sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster PrepareSimpleEntititiesJob eu.dnetlib.dhp.broker.oa.PrepareSimpleEntititiesJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareRelatedDatasourcesJob eu.dnetlib.dhp.broker.oa.PrepareRelatedDatasourcesJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareRelatedDatasetsJob eu.dnetlib.dhp.broker.oa.PrepareRelatedDatasetsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareRelatedProjectsJob eu.dnetlib.dhp.broker.oa.PrepareRelatedProjectsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareRelatedPublicationsJob eu.dnetlib.dhp.broker.oa.PrepareRelatedPublicationsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareRelatedSoftwaresJob eu.dnetlib.dhp.broker.oa.PrepareRelatedSoftwaresJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster JoinStep0 eu.dnetlib.dhp.broker.oa.JoinStep0Job dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster JoinStep1 eu.dnetlib.dhp.broker.oa.JoinStep1Job dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster JoinStep2 eu.dnetlib.dhp.broker.oa.JoinStep2Job dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster JoinStep3 eu.dnetlib.dhp.broker.oa.JoinStep3Job dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster JoinStep4 eu.dnetlib.dhp.broker.oa.JoinStep4Job dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster PrepareGroupsJob eu.dnetlib.dhp.broker.oa.PrepareGroupsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --graphPath${graphInputPath} --workingDir${workingDir} yarn cluster GenerateEventsJob eu.dnetlib.dhp.broker.oa.GenerateEventsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --workingDir${workingDir} --outputDir${outputDir} --datasourceIdWhitelist${datasourceIdWhitelist} --datasourceTypeWhitelist${datasourceTypeWhitelist} --datasourceIdBlacklist${datasourceIdBlacklist} --topicWhitelist${topicWhitelist} yarn cluster IndexEventSubsetOnESJob eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob dhp-broker-events-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.dynamicAllocation.maxExecutors="8" --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --outputDir${outputDir} --index${esEventIndexName} --esHost${esIndexHost} --esBatchWriteRetryCount${esBatchWriteRetryCount} --esBatchWriteRetryWait${esBatchWriteRetryWait} --esBatchSizeEntries${esBatchSizeEntries} --esNodesWanOnly${esNodesWanOnly} --maxEventsForTopic${maxIndexedEventsForDsAndTopic} --brokerApiBaseUrl${brokerApiBaseUrl} yarn cluster GenerateStatsJob eu.dnetlib.dhp.broker.oa.GenerateStatsJob dhp-broker-events-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --outputDir${outputDir} --dbUrl${brokerDbUrl} --dbUser${brokerDbUser} --dbPassword${brokerDbPassword} --brokerApiBaseUrl${brokerApiBaseUrl} yarn cluster IndexNotificationsOnESJob eu.dnetlib.dhp.broker.oa.IndexNotificationsJob dhp-broker-events-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.dynamicAllocation.maxExecutors="8" --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --outputDir${outputDir} --index${esNotificationsIndexName} --esHost${esIndexHost} --esBatchWriteRetryCount${esBatchWriteRetryCount} --esBatchWriteRetryWait${esBatchWriteRetryWait} --esBatchSizeEntries${esBatchSizeEntries} --esNodesWanOnly${esNodesWanOnly} --brokerApiBaseUrl${brokerApiBaseUrl}