From b53682f26707cea53691e6afff0d7b025db59abb Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 2 Dec 2015 07:59:08 +0000 Subject: [PATCH] You can abort a job by specifying the ID of the job, rather than the source. This is helpful since the "harvest run" command returns a list of still running job ids. --- ckanext/harvest/commands/harvester.py | 14 ++++------ ckanext/harvest/logic/action/update.py | 36 +++++++++++++++++--------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 1466c66..478354b 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -38,7 +38,7 @@ class Harvester(CkanCommand): harvester jobs - lists harvest jobs - harvester job_abort {source-id/name} + harvester job_abort {source-id/source-name/obj-id} - marks a job as "Aborted" so that the source can be restarted afresh. It ensures that the job's harvest objects status are also marked finished. You should ensure that neither the job nor its objects are @@ -358,19 +358,15 @@ class Harvester(CkanCommand): def job_abort(self): if len(self.args) >= 2: - source_id_or_name = unicode(self.args[1]) + job_or_source_id_or_name = unicode(self.args[1]) else: - print 'Please provide a source id' + print 'Please provide a job id or source name/id' sys.exit(1) - context = {'model': model, 'session': model.Session, - 'user': self.admin_user['name']} - source = get_action('harvest_source_show')( - context, {'id': source_id_or_name}) context = {'model': model, 'user': self.admin_user['name'], 'session': model.Session} - job = get_action('harvest_job_abort')(context, - {'source_id': source['id']}) + job = get_action('harvest_job_abort')( + context, {'id': job_or_source_id_or_name}) print 'Job status: {0}'.format(job['status']) def run_harvester(self): diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index afeb131..f4b4fcc 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -495,6 +495,11 @@ def harvest_job_abort(context, data_dict): marks them "ERROR", so any left in limbo are cleaned up. Does not actually stop running any queued harvest fetchs/objects. + Specify either id or source_id. + + :param id: the job id to abort, or the id or name of the harvest source + with a job to abort + :type id: string :param source_id: the name or id of the harvest source with a job to abort :type source_id: string ''' @@ -503,18 +508,25 @@ def harvest_job_abort(context, data_dict): model = context['model'] - source_id = data_dict.get('source_id') - source = harvest_source_show(context, {'id': source_id}) - - # HarvestJob set status to 'Finished' - # Don not use harvest_job_list since it can use a lot of memory - last_job = model.Session.query(HarvestJob) \ - .filter_by(source_id=source['id']) \ - .order_by(HarvestJob.created.desc()).first() - if not last_job: - raise NotFound('Error: source has no jobs') - job = get_action('harvest_job_show')(context, - {'id': last_job.id}) + source_or_job_id = data_dict.get('source_id') or data_dict.get('id') + if source_or_job_id: + try: + source = harvest_source_show(context, {'id': source_or_job_id}) + except NotFound: + job = get_action('harvest_job_show')( + context, {'id': source_or_job_id}) + else: + # HarvestJob set status to 'Aborted' + # Do not use harvest_job_list since it can use a lot of memory + # Get the most recent job for the source + job = model.Session.query(HarvestJob) \ + .filter_by(source_id=source['id']) \ + .order_by(HarvestJob.created.desc()).first() + if not job: + raise NotFound('Error: source has no jobs') + job_id = job.id + job = get_action('harvest_job_show')( + context, {'id': job_id}) if job['status'] != 'Finished': # i.e. New or Running