From ccf0cd3da223851c3544ec9b5e4d1a73e6cfed9e Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 8 Jun 2012 17:09:22 +0100 Subject: [PATCH] Add copious logging to record what happens in harvesting. --- ckanext/harvest/logic/action/create.py | 15 +++++++++++++-- ckanext/harvest/logic/action/delete.py | 8 +++++++- ckanext/harvest/logic/action/update.py | 10 +++++++++- ckanext/harvest/model/__init__.py | 1 - ckanext/harvest/plugin.py | 2 +- ckanext/harvest/queue.py | 2 +- ckanext/harvest/templates/index.html | 12 ++++-------- 7 files changed, 35 insertions(+), 15 deletions(-) diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index b3553f1..56fca7c 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -1,4 +1,5 @@ import re +import logging from ckan.logic import NotFound, ValidationError, check_access from ckan.lib.navl.dictization_functions import validate @@ -9,8 +10,11 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize, harvest_job_dictize) from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list +log = logging.getLogger(__name__) + def harvest_source_create(context,data_dict): + log.info('Creating harvest source: %r', data_dict) check_access('harvest_source_create',context,data_dict) model = context['model'] @@ -21,6 +25,7 @@ def harvest_source_create(context,data_dict): if errors: session.rollback() + log.warn('Harvest source does not validate: %r', errors) raise ValidationError(errors,_error_summary(errors)) source = HarvestSource() @@ -36,11 +41,12 @@ def harvest_source_create(context,data_dict): source.active = data['active'] source.save() + log.info('Harvest source created: %s', source.id) return harvest_source_dictize(source,context) def harvest_job_create(context,data_dict): - + log.info('Harvest job create: %r', data_dict) check_access('harvest_job_create',context,data_dict) source_id = data_dict['source_id'] @@ -48,10 +54,12 @@ def harvest_job_create(context,data_dict): # Check if source exists source = HarvestSource.get(source_id) if not source: + log.warn('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) # Check if the source is active if not source.active: + log.warn('Harvest job cannot be created for inactive source %s', source_id) raise Exception('Can not create jobs on inactive sources') # Check if there already is an unrun job for this source @@ -61,16 +69,18 @@ def harvest_job_create(context,data_dict): } exists = harvest_job_list(context,data_dict) if len(exists): + log.warn('There is already an unrun job %r for this source %s', exists, source_id) raise Exception('There already is an unrun job for this source') job = HarvestJob() job.source = source job.save() + log.info('Harvest job saved %s', job.id) return harvest_job_dictize(job,context) def harvest_job_create_all(context,data_dict): - + log.info('Harvest job create all: %r', data_dict) check_access('harvest_job_create_all',context,data_dict) data_dict.update({'only_active':True}) @@ -92,6 +102,7 @@ def harvest_job_create_all(context,data_dict): job = harvest_job_create(context,{'source_id':source['id']}) jobs.append(job) + log.info('Created jobs for %i harvest sources', len(jobs)) return jobs def _error_summary(error_dict): diff --git a/ckanext/harvest/logic/action/delete.py b/ckanext/harvest/logic/action/delete.py index a69d3aa..84b29b1 100644 --- a/ckanext/harvest/logic/action/delete.py +++ b/ckanext/harvest/logic/action/delete.py @@ -1,15 +1,19 @@ +import logging + from ckan.logic import NotFound, check_access from ckanext.harvest.model import (HarvestSource, HarvestJob) +log = logging.getLogger(__name__) def harvest_source_delete(context,data_dict): - + log.info('Deleting harvest source: %r', data_dict) check_access('harvest_source_delete',context,data_dict) source_id = data_dict.get('id') source = HarvestSource.get(source_id) if not source: + log.warn('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) # Don't actually delete the record, just flag it as inactive @@ -19,8 +23,10 @@ def harvest_source_delete(context,data_dict): # Abort any pending jobs jobs = HarvestJob.filter(source=source,status=u'New') if jobs: + log.info('Aborting %i jobs due to deleted harvest source', jobs.count()) for job in jobs: job.status = u'Aborted' job.save() + log.info('Harvest source %s deleted', source_id) return True diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 92315be..a2be3a8 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -30,8 +30,10 @@ def harvest_source_update(context,data_dict): source_id = data_dict.get('id') schema = context.get('schema') or default_harvest_source_schema() + log.info('Harvest source %s update: %r', source_id, data_dict) source = HarvestSource.get(source_id) if not source: + log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) data, errors = validate(data_dict, schema) @@ -55,6 +57,7 @@ def harvest_source_update(context,data_dict): # Abort any pending jobs if not source.active: jobs = HarvestJob.filter(source=source,status=u'New') + log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count()) if jobs: for job in jobs: job.status = u'Aborted' @@ -71,6 +74,7 @@ def harvest_objects_import(context,data_dict): It will only affect the last fetched objects already present in the database. ''' + log.info('Harvest objects import: %r', data_dict) check_access('harvest_objects_import',context,data_dict) model = context['model'] @@ -80,9 +84,11 @@ def harvest_objects_import(context,data_dict): if source_id: source = HarvestSource.get(source_id) if not source: + log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) if not source.active: + log.warn('Harvest source %s is not active.', source_id) raise Exception('This harvest source is not active') last_objects_ids = session.query(HarvestObject.id) \ @@ -108,10 +114,11 @@ def harvest_objects_import(context,data_dict): harvester.import_stage(obj) break last_objects.append(harvest_object_dictize(obj,context)) + log.info('Harvest objects imported: %r', last_objects) return last_objects def harvest_jobs_run(context,data_dict): - + log.info('Harvest job run: %r', data_dict) check_access('harvest_jobs_run',context,data_dict) source_id = data_dict.get('source_id',None) @@ -119,6 +126,7 @@ def harvest_jobs_run(context,data_dict): # Check if there are pending harvest jobs jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'}) if len(jobs) == 0: + log.info('No new harvest jobs.') raise Exception('There are no new harvesting jobs') # Send each job to the gather queue diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index 205873e..79b73e1 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -36,7 +36,6 @@ harvest_object_error_table = None def setup(): - if harvest_source_table is None: define_harvester_tables() log.debug('Harvest tables defined in memory') diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index a548c3d..b2e61e9 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -122,7 +122,7 @@ class Harvest(SingletonPlugin): module_root = '%s.%s' % (module_root, auth_profile) auth_functions = _get_auth_functions(module_root,auth_functions) - log.info('Using auth profile at %s' % module_root) + log.debug('Using auth profile at %s' % module_root) return auth_functions diff --git a/ckanext/harvest/queue.py b/ckanext/harvest/queue.py index 817a9ad..819e790 100644 --- a/ckanext/harvest/queue.py +++ b/ckanext/harvest/queue.py @@ -29,7 +29,7 @@ EXCHANGE_NAME = 'ckan.harvest' def get_carrot_connection(): backend = config.get('ckan.harvest.mq.library', 'pyamqplib') - log.info("Carrot connnection using %s backend" % backend) + log.debug("Carrot connection using %s backend" % backend) try: port = int(config.get('ckan.harvest.mq.port', PORT)) except ValueError: diff --git a/ckanext/harvest/templates/index.html b/ckanext/harvest/templates/index.html index 6e037e0..aba093c 100644 --- a/ckanext/harvest/templates/index.html +++ b/ckanext/harvest/templates/index.html @@ -57,14 +57,10 @@ View Edit Refresh - - - ${source.url[:50]}... - - - ${source.url} - - + + ${truncate(source.url, 50)} ${source.type} ${source.active}