From efe977512b730522f769b40ad87772007038bc8d Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 25 Feb 2013 17:17:08 +0000 Subject: [PATCH 1/2] Include gather errors on job summaries and reports --- ckanext/harvest/logic/action/get.py | 27 ++++++++++++++++++++++----- ckanext/harvest/logic/dictization.py | 15 ++++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index e02bb76..022d2ac 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -160,6 +160,24 @@ def harvest_job_report(context, data_dict): if not job: raise NotFound + report = { + 'gather_errors': [], + 'object_errors': [] + } + + # Gather errors + q = model.Session.query(harvest_model.HarvestGatherError) \ + .join(harvest_model.HarvestJob) \ + .filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \ + .order_by(harvest_model.HarvestGatherError.created.desc()) + + for error in q.all(): + report['gather_errors'].append({ + 'message': error.message + }) + + # Object errors + # Check if the harvester for this job's source has a method for returning # the URL to the original document original_url_builder = None @@ -173,19 +191,18 @@ def harvest_job_report(context, data_dict): .filter(harvest_model.HarvestObject.harvest_job_id==job.id) \ .order_by(harvest_model.HarvestObjectError.harvest_object_id) - report = {} for error, guid in q.all(): - if not error.harvest_object_id in report: - report[error.harvest_object_id] = { + if not error.harvest_object_id in report['object_errors']: + report['object_errors'][error.harvest_object_id] = { 'guid': guid, 'errors': [] } if original_url_builder: url = original_url_builder(error.harvest_object_id) if url: - report[error.harvest_object_id]['original_url'] = url + report['object_errors'][error.harvest_object_id]['original_url'] = url - report[error.harvest_object_id]['errors'].append({ + report['object_errors'][error.harvest_object_id]['errors'].append({ 'message': error.message, 'line': error.line, 'type': error.stage diff --git a/ckanext/harvest/logic/dictization.py b/ckanext/harvest/logic/dictization.py index 8415370..6c6f182 100644 --- a/ckanext/harvest/logic/dictization.py +++ b/ckanext/harvest/logic/dictization.py @@ -37,10 +37,6 @@ def harvest_job_dictize(job, context): for status, count in stats: out['stats'][status] = count - out['gather_errors'] = [] - for error in job.gather_errors: - out['gather_errors'].append(error.as_dict()) - if context.get('return_error_summary', True): q = model.Session.query(HarvestObjectError.message, \ func.count(HarvestObjectError.message).label('error_count')) \ @@ -49,9 +45,14 @@ def harvest_job_dictize(job, context): .group_by(HarvestObjectError.message) \ .order_by('error_count desc') \ .limit(context.get('error_summmary_limit', 20)) - - out['error_summary'] = q.all() - + out['object_error_summary'] = q.all() + q = model.Session.query(HarvestGatherError.message, \ + func.count(HarvestGatherError.message).label('error_count')) \ + .filter(HarvestGatherError.harvest_job_id==job.id) \ + .group_by(HarvestGatherError.message) \ + .order_by('error_count desc') \ + .limit(context.get('error_summmary_limit', 20)) + out['gather_error_summary'] = q.all() return out def harvest_object_dictize(obj, context): From a86d91c3f0da263d8a6244520c9be42f896cee8e Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 28 Feb 2013 12:17:15 +0000 Subject: [PATCH 2/2] [#11] Make get actions side_effect_free --- ckanext/harvest/logic/action/get.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index 022d2ac..055bfeb 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -8,7 +8,7 @@ from ckan.plugins import PluginImplementations from ckanext.harvest.interfaces import IHarvester import ckan.plugins as p -from ckan.logic import NotFound, check_access +from ckan.logic import NotFound, check_access, side_effect_free from ckanext.harvest import model as harvest_model @@ -19,7 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize, from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema log = logging.getLogger(__name__) - +@side_effect_free def harvest_source_show(context,data_dict): ''' Returns the metadata of a harvest source @@ -42,6 +42,7 @@ def harvest_source_show(context,data_dict): return source_dict +@side_effect_free def harvest_source_show_status(context, data_dict): ''' Returns a status report for a harvest source @@ -60,7 +61,7 @@ def harvest_source_show_status(context, data_dict): source = harvest_model.HarvestSource.get(data_dict['id']) if not source: - raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id'])) + raise p.toolkit.ObjectNotFound('Harvest source {0} does not exist'.format(data_dict['id'])) out = { 'job_count': 0, @@ -102,7 +103,7 @@ def harvest_source_show_status(context, data_dict): return out - +@side_effect_free def harvest_source_list(context, data_dict): check_access('harvest_source_list',context,data_dict) @@ -116,6 +117,7 @@ def harvest_source_list(context, data_dict): context.update({'detailed':False}) return [harvest_source_dictize(source, context) for source in sources] +@side_effect_free def harvest_source_for_a_dataset(context, data_dict): '''For a given dataset, return the harvest source that created or last updated it, otherwise NotFound.''' @@ -136,6 +138,7 @@ def harvest_source_for_a_dataset(context, data_dict): return harvest_source_dictize(source,context) +@side_effect_free def harvest_job_show(context,data_dict): check_access('harvest_job_show',context,data_dict) @@ -149,6 +152,7 @@ def harvest_job_show(context,data_dict): return harvest_job_dictize(job,context) +@side_effect_free def harvest_job_report(context, data_dict): check_access('harvest_job_show', context, data_dict) @@ -210,6 +214,7 @@ def harvest_job_report(context, data_dict): return report +@side_effect_free def harvest_job_list(context,data_dict): check_access('harvest_job_list',context,data_dict) @@ -235,6 +240,7 @@ def harvest_job_list(context,data_dict): context['return_error_summary'] = False return [harvest_job_dictize(job, context) for job in jobs] +@side_effect_free def harvest_object_show(context,data_dict): check_access('harvest_object_show',context,data_dict) @@ -247,6 +253,7 @@ def harvest_object_show(context,data_dict): return harvest_object_dictize(obj,context) +@side_effect_free def harvest_object_list(context,data_dict): check_access('harvest_object_list',context,data_dict) @@ -269,6 +276,7 @@ def harvest_object_list(context,data_dict): return [getattr(obj,'id') for obj in objects] +@side_effect_free def harvesters_info_show(context,data_dict): check_access('harvesters_info_show',context,data_dict)