Add copious logging to record what happens in harvesting.
This commit is contained in:
parent
a5fac2ac86
commit
ccf0cd3da2
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import logging
|
||||
|
||||
from ckan.logic import NotFound, ValidationError, check_access
|
||||
from ckan.lib.navl.dictization_functions import validate
|
||||
|
@ -9,8 +10,11 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
|
|||
harvest_job_dictize)
|
||||
from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def harvest_source_create(context,data_dict):
|
||||
|
||||
log.info('Creating harvest source: %r', data_dict)
|
||||
check_access('harvest_source_create',context,data_dict)
|
||||
|
||||
model = context['model']
|
||||
|
@ -21,6 +25,7 @@ def harvest_source_create(context,data_dict):
|
|||
|
||||
if errors:
|
||||
session.rollback()
|
||||
log.warn('Harvest source does not validate: %r', errors)
|
||||
raise ValidationError(errors,_error_summary(errors))
|
||||
|
||||
source = HarvestSource()
|
||||
|
@ -36,11 +41,12 @@ def harvest_source_create(context,data_dict):
|
|||
source.active = data['active']
|
||||
|
||||
source.save()
|
||||
log.info('Harvest source created: %s', source.id)
|
||||
|
||||
return harvest_source_dictize(source,context)
|
||||
|
||||
def harvest_job_create(context,data_dict):
|
||||
|
||||
log.info('Harvest job create: %r', data_dict)
|
||||
check_access('harvest_job_create',context,data_dict)
|
||||
|
||||
source_id = data_dict['source_id']
|
||||
|
@ -48,10 +54,12 @@ def harvest_job_create(context,data_dict):
|
|||
# Check if source exists
|
||||
source = HarvestSource.get(source_id)
|
||||
if not source:
|
||||
log.warn('Harvest source %s does not exist', source_id)
|
||||
raise NotFound('Harvest source %s does not exist' % source_id)
|
||||
|
||||
# Check if the source is active
|
||||
if not source.active:
|
||||
log.warn('Harvest job cannot be created for inactive source %s', source_id)
|
||||
raise Exception('Can not create jobs on inactive sources')
|
||||
|
||||
# Check if there already is an unrun job for this source
|
||||
|
@ -61,16 +69,18 @@ def harvest_job_create(context,data_dict):
|
|||
}
|
||||
exists = harvest_job_list(context,data_dict)
|
||||
if len(exists):
|
||||
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
|
||||
raise Exception('There already is an unrun job for this source')
|
||||
|
||||
job = HarvestJob()
|
||||
job.source = source
|
||||
|
||||
job.save()
|
||||
log.info('Harvest job saved %s', job.id)
|
||||
return harvest_job_dictize(job,context)
|
||||
|
||||
def harvest_job_create_all(context,data_dict):
|
||||
|
||||
log.info('Harvest job create all: %r', data_dict)
|
||||
check_access('harvest_job_create_all',context,data_dict)
|
||||
|
||||
data_dict.update({'only_active':True})
|
||||
|
@ -92,6 +102,7 @@ def harvest_job_create_all(context,data_dict):
|
|||
job = harvest_job_create(context,{'source_id':source['id']})
|
||||
jobs.append(job)
|
||||
|
||||
log.info('Created jobs for %i harvest sources', len(jobs))
|
||||
return jobs
|
||||
|
||||
def _error_summary(error_dict):
|
||||
|
|
|
@ -1,15 +1,19 @@
|
|||
import logging
|
||||
|
||||
from ckan.logic import NotFound, check_access
|
||||
|
||||
from ckanext.harvest.model import (HarvestSource, HarvestJob)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def harvest_source_delete(context,data_dict):
|
||||
|
||||
log.info('Deleting harvest source: %r', data_dict)
|
||||
check_access('harvest_source_delete',context,data_dict)
|
||||
|
||||
source_id = data_dict.get('id')
|
||||
source = HarvestSource.get(source_id)
|
||||
if not source:
|
||||
log.warn('Harvest source %s does not exist', source_id)
|
||||
raise NotFound('Harvest source %s does not exist' % source_id)
|
||||
|
||||
# Don't actually delete the record, just flag it as inactive
|
||||
|
@ -19,8 +23,10 @@ def harvest_source_delete(context,data_dict):
|
|||
# Abort any pending jobs
|
||||
jobs = HarvestJob.filter(source=source,status=u'New')
|
||||
if jobs:
|
||||
log.info('Aborting %i jobs due to deleted harvest source', jobs.count())
|
||||
for job in jobs:
|
||||
job.status = u'Aborted'
|
||||
job.save()
|
||||
|
||||
log.info('Harvest source %s deleted', source_id)
|
||||
return True
|
||||
|
|
|
@ -30,8 +30,10 @@ def harvest_source_update(context,data_dict):
|
|||
source_id = data_dict.get('id')
|
||||
schema = context.get('schema') or default_harvest_source_schema()
|
||||
|
||||
log.info('Harvest source %s update: %r', source_id, data_dict)
|
||||
source = HarvestSource.get(source_id)
|
||||
if not source:
|
||||
log.error('Harvest source %s does not exist', source_id)
|
||||
raise NotFound('Harvest source %s does not exist' % source_id)
|
||||
|
||||
data, errors = validate(data_dict, schema)
|
||||
|
@ -55,6 +57,7 @@ def harvest_source_update(context,data_dict):
|
|||
# Abort any pending jobs
|
||||
if not source.active:
|
||||
jobs = HarvestJob.filter(source=source,status=u'New')
|
||||
log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
|
||||
if jobs:
|
||||
for job in jobs:
|
||||
job.status = u'Aborted'
|
||||
|
@ -71,6 +74,7 @@ def harvest_objects_import(context,data_dict):
|
|||
It will only affect the last fetched objects already present in the
|
||||
database.
|
||||
'''
|
||||
log.info('Harvest objects import: %r', data_dict)
|
||||
check_access('harvest_objects_import',context,data_dict)
|
||||
|
||||
model = context['model']
|
||||
|
@ -80,9 +84,11 @@ def harvest_objects_import(context,data_dict):
|
|||
if source_id:
|
||||
source = HarvestSource.get(source_id)
|
||||
if not source:
|
||||
log.error('Harvest source %s does not exist', source_id)
|
||||
raise NotFound('Harvest source %s does not exist' % source_id)
|
||||
|
||||
if not source.active:
|
||||
log.warn('Harvest source %s is not active.', source_id)
|
||||
raise Exception('This harvest source is not active')
|
||||
|
||||
last_objects_ids = session.query(HarvestObject.id) \
|
||||
|
@ -108,10 +114,11 @@ def harvest_objects_import(context,data_dict):
|
|||
harvester.import_stage(obj)
|
||||
break
|
||||
last_objects.append(harvest_object_dictize(obj,context))
|
||||
log.info('Harvest objects imported: %r', last_objects)
|
||||
return last_objects
|
||||
|
||||
def harvest_jobs_run(context,data_dict):
|
||||
|
||||
log.info('Harvest job run: %r', data_dict)
|
||||
check_access('harvest_jobs_run',context,data_dict)
|
||||
|
||||
source_id = data_dict.get('source_id',None)
|
||||
|
@ -119,6 +126,7 @@ def harvest_jobs_run(context,data_dict):
|
|||
# Check if there are pending harvest jobs
|
||||
jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
|
||||
if len(jobs) == 0:
|
||||
log.info('No new harvest jobs.')
|
||||
raise Exception('There are no new harvesting jobs')
|
||||
|
||||
# Send each job to the gather queue
|
||||
|
|
|
@ -36,7 +36,6 @@ harvest_object_error_table = None
|
|||
|
||||
def setup():
|
||||
|
||||
|
||||
if harvest_source_table is None:
|
||||
define_harvester_tables()
|
||||
log.debug('Harvest tables defined in memory')
|
||||
|
|
|
@ -122,7 +122,7 @@ class Harvest(SingletonPlugin):
|
|||
module_root = '%s.%s' % (module_root, auth_profile)
|
||||
auth_functions = _get_auth_functions(module_root,auth_functions)
|
||||
|
||||
log.info('Using auth profile at %s' % module_root)
|
||||
log.debug('Using auth profile at %s' % module_root)
|
||||
|
||||
return auth_functions
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ EXCHANGE_NAME = 'ckan.harvest'
|
|||
|
||||
def get_carrot_connection():
|
||||
backend = config.get('ckan.harvest.mq.library', 'pyamqplib')
|
||||
log.info("Carrot connnection using %s backend" % backend)
|
||||
log.debug("Carrot connection using %s backend" % backend)
|
||||
try:
|
||||
port = int(config.get('ckan.harvest.mq.port', PORT))
|
||||
except ValueError:
|
||||
|
|
|
@ -57,14 +57,10 @@
|
|||
<td><a href="harvest/${source.id}"><img src="ckanext/harvest/images/icons/source_view.png" alt="View" title="View" /></a></td>
|
||||
<td><a href="harvest/edit/${source.id}"><img src="ckanext/harvest/images/icons/source_edit.png" alt="Edit" title="Edit" /></a></td>
|
||||
<td><a href="harvest/refresh/${source.id}"><img src="ckanext/harvest/images/icons/source_refresh.png" alt="Refresh" title="Refresh" /></a></td>
|
||||
<py:choose>
|
||||
<py:when test="source.url and len(source.url)>50">
|
||||
<td>${source.url[:50]}<span title="${source.url}">...</span></td>
|
||||
</py:when>
|
||||
<py:otherwise>
|
||||
<td>${source.url}</td>
|
||||
</py:otherwise>
|
||||
</py:choose>
|
||||
<?python
|
||||
from webhelpers.text import truncate
|
||||
?>
|
||||
<td title="${source.url}">${truncate(source.url, 50)}</td>
|
||||
<td>${source.type}</td>
|
||||
<td class="state">${source.active}</td>
|
||||
<py:choose>
|
||||
|
|
Loading…
Reference in New Issue