[#5] Add error summary to harvest_job_dictize

It will return the counts for the 20 most common errors for that particular job. These will available when calling harvest_job_show. Also refactor the harvest source status object to just call harvest_job_dictize on the 'last_job' key, as it has all the interesting fields anyway.
2013-01-22 13:13:24 +00:00 · 2013-01-22 13:13:24 +00:00 · 9ba6e8f3b3
parent 0950827329
commit 9ba6e8f3b3
2 changed files with 18 additions and 27 deletions
--- a/ckanext/harvest/logic/action/get.py
+++ b/ckanext/harvest/logic/action/get.py
@ -65,9 +65,8 @@ def harvest_source_show_status(context, data_dict):
    out = {
           'job_count': 0,
-           'next_harvest': p.toolkit._('Not yet scheduled'),
+           'next_job': p.toolkit._('Not yet scheduled'),
-           'last_harvest_request': '',
+           'last_job': None,
           'last_harvest_statistics': {'new': 0, 'updated': 0, 'deleted': 0,'errored': 0},
           'total_datasets': 0,
           }
@ -82,31 +81,16 @@ def harvest_source_show_status(context, data_dict):
    # Get next scheduled job
    next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first()
    if next_job:
-        out['next_harvest'] = p.toolkit._('Scheduled')
+        out['next_job'] = p.toolkit._('Scheduled')
    # Get the last finished job
    last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \
               .order_by(harvest_model.HarvestJob.created.desc()).first()
    if not last_job:
        out['last_harvest_request'] = p.toolkit._('Not yet harvested')
        return out
-    out['last_job_id'] = last_job.id
+    out['last_job'] = harvest_job_dictize(last_job, context)
    out['last_harvest_request'] = str(last_job.gather_finished)
    last_job_report = model.Session.query(
                harvest_model.HarvestObject.report_status,
                func.count(harvest_model.HarvestObject.report_status)) \
            .filter(harvest_model.HarvestObject.harvest_job_id==last_job.id) \
            .group_by(harvest_model.HarvestObject.report_status)
    for row in last_job_report:
        if row[0]:
            out['last_harvest_statistics'][row[0]] = row[1]
    # Add the gather stage errors
    out['last_harvest_statistics']['errored'] += len(last_job.gather_errors)
    # Overall statistics
    packages = model.Session.query(model.Package) \
@ -166,6 +150,7 @@ def harvest_job_show(context,data_dict):
    return harvest_job_dictize(job,context)
 def harvest_job_list(context,data_dict):
    check_access('harvest_job_list',context,data_dict)
--- a/ckanext/harvest/logic/dictization.py
+++ b/ckanext/harvest/logic/dictization.py
@ -24,16 +24,11 @@ def harvest_source_dictize(source, context):
 def harvest_job_dictize(job, context):
    out = job.as_dict()
    out['source'] = job.source_id
    out['objects'] = []
    out['gather_errors'] = []
-    if context.get('return_objects', True):
+    model = context['model']
        for obj in job.objects:
            out['objects'].append(obj.as_dict())
    if context.get('return_stats', True):
-        stats = context['model'].Session.query(
+        stats = model.Session.query(
            HarvestObject.report_status,
            func.count(HarvestObject.id).label('total_objects'))\
                .filter_by(harvest_job_id=job.id)\
@ -42,9 +37,20 @@ def harvest_job_dictize(job, context):
        for status, count in stats:
            out['stats'][status] = count
    out['gather_errors'] = []
    for error in job.gather_errors:
        out['gather_errors'].append(error.as_dict())
    q = model.Session.query(HarvestObjectError.message, \
                            func.count(HarvestObjectError.message).label('error_count')) \
                      .join(HarvestObject) \
                      .filter(HarvestObject.harvest_job_id==job.id) \
                      .group_by(HarvestObjectError.message) \
                      .order_by('error_count desc') \
                      .limit(context.get('error_summmary_limit', 20))
    out['error_summary'] = q.all()
    return out
 def harvest_object_dictize(obj, context):