sourcePath the source path outputPath the output path country the country for which to produce the dump hiveDbName the target hive database name hiveJdbcUrl hive server jdbc url hiveMetastoreUris hive server metastore URIs sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} yarn cluster Dump table publication eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultsRelatedToCountry dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --outputPath${workingDir}/resultsInCountry --country${country} yarn cluster Dump table publication eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry dump-${projectVersion}.jar --executor-memory=7G --executor-cores=2 --driver-memory=7G --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath} --resultWithCountry${workingDir}/resultsInCountry --resultTypepublication yarn cluster Dump table dataset eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath} --resultTypedataset --resultWithCountry${workingDir}/resultsInCountry yarn cluster Dump table ORP eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath} --resultTypeotherresearchproduct --resultWithCountry${workingDir}/resultsInCountry yarn cluster Dump table software eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath} --resultTypesoftware --resultWithCountry${workingDir}/resultsInCountry yarn cluster Select valid table relation eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectSubset dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath} --outputPath${outputPath} --removeSet${removeSet} yarn cluster Dump table publication for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${outputPath}/original/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/dump/publication --communityMapPath${workingDir}/communityMap --dumpTypecountry yarn cluster Dump table dataset for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${outputPath}/original/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/dump/dataset --communityMapPath${workingDir}/communityMap --dumpTypecountry yarn cluster Dump table ORP for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${outputPath}/original/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/dump/otherresearchproduct --communityMapPath${workingDir}/communityMap --dumpTypecountry yarn cluster Dump table software for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${outputPath}/original/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/dump/software --communityMapPath${workingDir}/communityMap --dumpTypecountry yarn cluster Prepare association result subset of project info eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} --outputPath${workingDir}/preparedInfo yarn cluster Extend dumped publications with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/dump/publication --outputPath${outputPath}/dump/publication --preparedInfoPath${workingDir}/preparedInfo yarn cluster Extend dumped dataset with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/dump/dataset --outputPath${outputPath}/dump/dataset --preparedInfoPath${workingDir}/preparedInfo yarn cluster Extend dumped ORP with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/dump/otherresearchproduct --outputPath${outputPath}/dump/otherresearchproduct --preparedInfoPath${workingDir}/preparedInfo yarn cluster Extend dumped software with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/dump/software --outputPath${outputPath}/dump/software --preparedInfoPath${workingDir}/preparedInfo eu.dnetlib.dhp.oa.graph.dump.MakeTar --hdfsPath${outputPath}/tar --nameNode${nameNode} --sourcePath${outputPath}/dump Sub-workflow dump complete failed with error message ${wf:errorMessage()}