sourcePath the source path isLookUpUrl the isLookup service endpoint outputPath the output path resultAggregation true if all the result type have to be dumped under result. false otherwise accessToken the access token used for the deposition in Zenodo connectionUrl the connection url for Zenodo metadata the metadata associated to the deposition depositionType the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) conceptRecordId for new version, the id of the record for the old deposition depositionId the depositionId of a deposition open that has to be added content organizationCommunityMap the organization community map hiveDbName the target hive database name hiveJdbcUrl hive server jdbc url hiveMetastoreUris hive server metastore URIs sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} yarn cluster Dump table publication eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/result/publication --communityMapPath${workingDir}/communityMap yarn cluster Dump table dataset eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/result/dataset --communityMapPath${workingDir}/communityMap yarn cluster Dump table ORP eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/result/otherresearchproduct --communityMapPath${workingDir}/communityMap yarn cluster Dump table software eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/result/software --communityMapPath${workingDir}/communityMap yarn cluster Dump table organization eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/organization --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization --outputPath${workingDir}/collect/organization --communityMapPath${workingDir}/communityMap yarn cluster Dump table project eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/project --resultTableNameeu.dnetlib.dhp.schema.oaf.Project --outputPath${workingDir}/collect/project --communityMapPath${workingDir}/communityMap yarn cluster Dump table datasource eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/datasource --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource --outputPath${workingDir}/collect/datasource --communityMapPath${workingDir}/communityMap yarn cluster Dump table relation eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpRelationJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/relation --outputPath${workingDir}/relation/relation eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextEntities --hdfsPath${workingDir}/collect/communities_infrastructures --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextRelation --hdfsPath${workingDir}/relation/context --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} yarn cluster Dump table relation eu.dnetlib.dhp.oa.graph.dump.graph.SparkOrganizationRelation dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/relation --outputPath${workingDir}/relation/contextOrg --organizationCommunityMap${organizationCommunityMap} --communityMapPath${workingDir}/communityMap yarn cluster Extract Relations from publication eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/relation/publication --communityMapPath${workingDir}/communityMap yarn cluster Dump table dataset eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/relation/dataset --communityMapPath${workingDir}/communityMap yarn cluster Dump table ORP eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/relation/orp --communityMapPath${workingDir}/communityMap yarn cluster Dump table software eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/relation/software --communityMapPath${workingDir}/communityMap yarn cluster Collect Results and Relations and put them in the right path eu.dnetlib.dhp.oa.graph.dump.graph.SparkCollectAndSave dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir} --outputPath${workingDir}/collect --resultAggregation${resultAggregation} eu.dnetlib.dhp.oa.graph.dump.MakeTar --hdfsPath${outputPath} --nameNode${nameNode} --sourcePath${workingDir}/collect eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS --hdfsPath${outputPath} --nameNode${nameNode} --accessToken${accessToken} --connectionUrl${connectionUrl} --metadata${metadata} --communityMapPath${workingDir}/communityMap --conceptRecordId${conceptRecordId} --depositionType${depositionType} --depositionId${depositionId}