[#21] Improve gather stage error handling
See issue for full details. Basically we don't want to catch any exception at the queue.py level, as they prevent debugging. Harvesters should deal with them and return a list of ids or an empty list if no objects need to be fetched. Also improved the debug messages.
This commit is contained in:
parent
91f18bffab
commit
d77f16aba9
|
@ -97,15 +97,21 @@ def gather_callback(channel, method, header, body):
|
||||||
try:
|
try:
|
||||||
id = json.loads(body)['harvest_job_id']
|
id = json.loads(body)['harvest_job_id']
|
||||||
log.debug('Received harvest job id: %s' % id)
|
log.debug('Received harvest job id: %s' % id)
|
||||||
|
except KeyError:
|
||||||
|
log.error('No harvest job id received')
|
||||||
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
return False
|
||||||
|
|
||||||
# Get a publisher for the fetch queue
|
# Get a publisher for the fetch queue
|
||||||
publisher = get_fetch_publisher()
|
publisher = get_fetch_publisher()
|
||||||
|
|
||||||
try:
|
|
||||||
job = HarvestJob.get(id)
|
job = HarvestJob.get(id)
|
||||||
except:
|
|
||||||
|
if not job:
|
||||||
log.error('Harvest job does not exist: %s' % id)
|
log.error('Harvest job does not exist: %s' % id)
|
||||||
else:
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
return False
|
||||||
|
|
||||||
# Send the harvest job to the plugins that implement
|
# Send the harvest job to the plugins that implement
|
||||||
# the Harvester interface, only if the source type
|
# the Harvester interface, only if the source type
|
||||||
# matches
|
# matches
|
||||||
|
@ -115,21 +121,30 @@ def gather_callback(channel, method, header, body):
|
||||||
harvester_found = True
|
harvester_found = True
|
||||||
# Get a list of harvest object ids from the plugin
|
# Get a list of harvest object ids from the plugin
|
||||||
job.gather_started = datetime.datetime.now()
|
job.gather_started = datetime.datetime.now()
|
||||||
try:
|
|
||||||
harvest_object_ids = harvester.gather_stage(job)
|
harvest_object_ids = harvester.gather_stage(job)
|
||||||
except Exception, e:
|
|
||||||
log.error('Gather stage failed unexpectedly: %s' % e)
|
|
||||||
job.status = 'Errored'
|
|
||||||
job.save()
|
|
||||||
continue
|
|
||||||
job.gather_finished = datetime.datetime.now()
|
job.gather_finished = datetime.datetime.now()
|
||||||
job.save()
|
job.save()
|
||||||
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
|
|
||||||
if harvest_object_ids and len(harvest_object_ids) > 0:
|
if not isinstance(harvest_object_ids, list):
|
||||||
|
log.error('Gather stage failed')
|
||||||
|
publisher.close()
|
||||||
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(harvest_object_ids) == 0:
|
||||||
|
log.info('No harvest objects to fetch')
|
||||||
|
publisher.close()
|
||||||
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
return False
|
||||||
|
|
||||||
|
log.debug('Received from plugin gather_stage: {0} objects (first: {1} last: {2})'.format(
|
||||||
|
len(harvest_object_ids), harvest_object_ids[:1], harvest_object_ids[-1:]))
|
||||||
for id in harvest_object_ids:
|
for id in harvest_object_ids:
|
||||||
# Send the id to the fetch queue
|
# Send the id to the fetch queue
|
||||||
publisher.send({'harvest_object_id':id})
|
publisher.send({'harvest_object_id':id})
|
||||||
log.debug('Sent object %s to the fetch queue' % id)
|
log.debug('Sent {0} objects to the fetch queue'.format(len(harvest_object_ids)))
|
||||||
|
|
||||||
if not harvester_found:
|
if not harvester_found:
|
||||||
msg = 'No harvester could be found for source type %s' % job.source.type
|
msg = 'No harvester could be found for source type %s' % job.source.type
|
||||||
|
@ -137,13 +152,8 @@ def gather_callback(channel, method, header, body):
|
||||||
err.save()
|
err.save()
|
||||||
log.error(msg)
|
log.error(msg)
|
||||||
|
|
||||||
finally:
|
|
||||||
publisher.close()
|
|
||||||
|
|
||||||
except KeyError:
|
|
||||||
log.error('No harvest job id received')
|
|
||||||
finally:
|
|
||||||
model.Session.remove()
|
model.Session.remove()
|
||||||
|
publisher.close()
|
||||||
channel.basic_ack(method.delivery_tag)
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue