sourcePath
the source path
outputPath
the output path
accessToken
the access token used for the deposition in Zenodo
connectionUrl
the connection url for Zenodo
metadata
the metadata associated to the deposition
depositionType
the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)
conceptRecordId
for new version, the id of the record for the old deposition
depositionId
the depositionId of a deposition open that has to be added content
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
oozieActionShareLibForSpark2
oozie action sharelib for spark 2.*
spark2ExtraListeners
com.cloudera.spark.lineage.NavigatorAppListener
spark 2.* extra listeners classname
spark2SqlQueryExecutionListeners
com.cloudera.spark.lineage.NavigatorQueryListener
spark 2.* sql query execution listeners classname
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
yarn
cluster
Dump Publication For EOSC
eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1
dump-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--sourcePath${sourcePath}/publication
--resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
--outputPath${workingDir}/tar/publication
yarn
cluster
Dump Dataset For EOSC
eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1
dump-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--sourcePath${sourcePath}/dataset
--resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
--outputPath${workingDir}/tar/dataset
yarn
cluster
Dump ORP For EOSC
eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1
dump-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--sourcePath${sourcePath}/otherresearchproduct
--resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--outputPath${workingDir}/tar/otherresearchproduct
yarn
cluster
Dump Software For EOSC
eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1
dump-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--sourcePath${sourcePath}/software
--resultTableNameeu.dnetlib.dhp.schema.oaf.Software
--outputPath${workingDir}/tar/software
eu.dnetlib.dhp.oa.graph.dump.MakeTar
--hdfsPath${outputPath}
--nameNode${nameNode}
--sourcePath${workingDir}/tar
eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS
--hdfsPath${outputPath}
--nameNode${nameNode}
--accessToken${accessToken}
--connectionUrl${connectionUrl}
--metadata${metadata}
--conceptRecordId${conceptRecordId}
--depositionType${depositionType}
--depositionId${depositionId}