[#5] Add error summary to harvest_job_dictize

It will return the counts for the 20 most common errors for that
particular job. These will available when calling harvest_job_show.

Also refactor the harvest source status object to just call
harvest_job_dictize on the 'last_job' key, as it has all the
interesting fields anyway.
This commit is contained in:
amercader 2013-01-22 13:13:24 +00:00
parent 0950827329
commit 9ba6e8f3b3
2 changed files with 18 additions and 27 deletions

View File

@ -65,9 +65,8 @@ def harvest_source_show_status(context, data_dict):
out = { out = {
'job_count': 0, 'job_count': 0,
'next_harvest': p.toolkit._('Not yet scheduled'), 'next_job': p.toolkit._('Not yet scheduled'),
'last_harvest_request': '', 'last_job': None,
'last_harvest_statistics': {'new': 0, 'updated': 0, 'deleted': 0,'errored': 0},
'total_datasets': 0, 'total_datasets': 0,
} }
@ -82,31 +81,16 @@ def harvest_source_show_status(context, data_dict):
# Get next scheduled job # Get next scheduled job
next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first() next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first()
if next_job: if next_job:
out['next_harvest'] = p.toolkit._('Scheduled') out['next_job'] = p.toolkit._('Scheduled')
# Get the last finished job # Get the last finished job
last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \ last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \
.order_by(harvest_model.HarvestJob.created.desc()).first() .order_by(harvest_model.HarvestJob.created.desc()).first()
if not last_job: if not last_job:
out['last_harvest_request'] = p.toolkit._('Not yet harvested')
return out return out
out['last_job_id'] = last_job.id out['last_job'] = harvest_job_dictize(last_job, context)
out['last_harvest_request'] = str(last_job.gather_finished)
last_job_report = model.Session.query(
harvest_model.HarvestObject.report_status,
func.count(harvest_model.HarvestObject.report_status)) \
.filter(harvest_model.HarvestObject.harvest_job_id==last_job.id) \
.group_by(harvest_model.HarvestObject.report_status)
for row in last_job_report:
if row[0]:
out['last_harvest_statistics'][row[0]] = row[1]
# Add the gather stage errors
out['last_harvest_statistics']['errored'] += len(last_job.gather_errors)
# Overall statistics # Overall statistics
packages = model.Session.query(model.Package) \ packages = model.Session.query(model.Package) \
@ -166,6 +150,7 @@ def harvest_job_show(context,data_dict):
return harvest_job_dictize(job,context) return harvest_job_dictize(job,context)
def harvest_job_list(context,data_dict): def harvest_job_list(context,data_dict):
check_access('harvest_job_list',context,data_dict) check_access('harvest_job_list',context,data_dict)

View File

@ -24,16 +24,11 @@ def harvest_source_dictize(source, context):
def harvest_job_dictize(job, context): def harvest_job_dictize(job, context):
out = job.as_dict() out = job.as_dict()
out['source'] = job.source_id
out['objects'] = []
out['gather_errors'] = []
if context.get('return_objects', True): model = context['model']
for obj in job.objects:
out['objects'].append(obj.as_dict())
if context.get('return_stats', True): if context.get('return_stats', True):
stats = context['model'].Session.query( stats = model.Session.query(
HarvestObject.report_status, HarvestObject.report_status,
func.count(HarvestObject.id).label('total_objects'))\ func.count(HarvestObject.id).label('total_objects'))\
.filter_by(harvest_job_id=job.id)\ .filter_by(harvest_job_id=job.id)\
@ -42,9 +37,20 @@ def harvest_job_dictize(job, context):
for status, count in stats: for status, count in stats:
out['stats'][status] = count out['stats'][status] = count
out['gather_errors'] = []
for error in job.gather_errors: for error in job.gather_errors:
out['gather_errors'].append(error.as_dict()) out['gather_errors'].append(error.as_dict())
q = model.Session.query(HarvestObjectError.message, \
func.count(HarvestObjectError.message).label('error_count')) \
.join(HarvestObject) \
.filter(HarvestObject.harvest_job_id==job.id) \
.group_by(HarvestObjectError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
out['error_summary'] = q.all()
return out return out
def harvest_object_dictize(obj, context): def harvest_object_dictize(obj, context):