graphBasePath the target path to store raw graph reuseContent false should import content from the aggregator or reuse a previous version postgresURL the postgres URL to access to the database postgresUser the user postgres postgresPassword the password postgres mongoURL mongoDB url, example: mongodb://[username:password@]host[:port] mongoDb mongo database sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${wf:conf('reuseContent') eq false} ${wf:conf('reuseContent') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication -p${workingDir}/db_claims -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} -aclaims eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${workingDir}/odf_claims -mongourl${mongoURL} -mongodb${mongoDb} -fODF -lstore -iclaim eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${workingDir}/oaf_claims -mongourl${mongoURL} -mongodb${mongoDb} -fOAF -lstore -iclaim eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication -p${workingDir}/db_records -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${workingDir}/odf_records -mongourl${mongoURL} -mongodb${mongoDb} -fODF -lstore -icleaned eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${workingDir}/oaf_records -mongourl${mongoURL} -mongodb${mongoDb} -fOAF -lstore -icleaned yarn cluster GenerateEntities_claim eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} -s${workingDir}/db_claims,${workingDir}/oaf_claims,${workingDir}/odf_claims -t${workingDir}/entities_claim -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} yarn cluster GenerateGraph_claims eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} -s${workingDir}/entities_claim -g${workingDir}/graph_claims yarn cluster GenerateEntities eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} -s${workingDir}/db_records,${workingDir}/oaf_records,${workingDir}/odf_records -t${workingDir}/entities -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} yarn cluster GenerateGraph eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 -s${workingDir}/entities -g${workingDir}/graph_raw yarn cluster MergeClaims_publication eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication yarn cluster MergeClaims_dataset eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset yarn cluster MergeClaims_relation eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation yarn cluster MergeClaims_software eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=1920 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software yarn cluster MergeClaims_otherresearchproduct eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=1920 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct yarn cluster MergeClaims_datasource eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource yarn cluster MergeClaims_organization eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization yarn cluster MergeClaims_project eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphBasePath}/graph_raw --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project