hiveDbName The name of the Hive DB to be used softwareCodeRepositoryURLs The path in the HDFS to save the software repository URLs lastVisitsPath The path in the HDFS to save the responses of the last visit requests archiveRequestsPath The path in the HDFS to save the responses of the archive requests actionsetsPath The path in the HDFS to save the action sets graphPath The path in the HDFS to the base folder of the graph maxNumberOfRetry Max number of retries for failed API calls retryDelay Retry delay for failed requests (in sec) requestDelay Delay between API requests (in ms) apiAccessToken The API Key of the SWH API softwareLimit Limit on the number of repo URLs to use (Optional); for debug purposes resumeFrom Variable that indicates the step to start from ${jobTracker} ${nameNode} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} actionsetsPath ${actionsetsPath} apiAccessToken ${apiAccessToken} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${wf:conf('resumeFrom') eq 'collect-software-repository-urls'} ${wf:conf('resumeFrom') eq 'create-swh-actionsets'} yarn cluster Collect software repository URLs eu.dnetlib.dhp.swh.CollectSoftwareRepositoryURLs dhp-swh-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} --hiveDbName${hiveDbName} --hiveMetastoreUris${hiveMetastoreUris} --softwareLimit${softwareLimit} eu.dnetlib.dhp.swh.CollectLastVisitRepositoryData --namenode${nameNode} --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} --lastVisitsPath${lastVisitsPath} --maxNumberOfRetry${maxNumberOfRetry} --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodGET --apiAccessToken${apiAccessToken} eu.dnetlib.dhp.swh.ArchiveRepositoryURLs --namenode${nameNode} --lastVisitsPath${lastVisitsPath} --archiveRequestsPath${archiveRequestsPath} --archiveThresholdInDays365 --maxNumberOfRetry${maxNumberOfRetry} --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodPOST --apiAccessToken${apiAccessToken} yarn cluster Create actionsets for SWH data eu.dnetlib.dhp.swh.PrepareSWHActionsets dhp-swh-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --lastVisitsPath${lastVisitsPath} --actionsetsPath${actionsetsPath} --softwareInputPath${graphPath}/software