Merge branch '2.0-dataset-sources' of github.com:okfn/ckanext-harvest into 2.0-dataset-sources

This commit is contained in:
joetsoi 2013-02-28 18:11:12 +00:00
commit ffce2c7915
2 changed files with 42 additions and 16 deletions

View File

@ -8,7 +8,7 @@ from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester
import ckan.plugins as p
from ckan.logic import NotFound, check_access
from ckan.logic import NotFound, check_access, side_effect_free
from ckanext.harvest import model as harvest_model
@ -19,7 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
log = logging.getLogger(__name__)
@side_effect_free
def harvest_source_show(context,data_dict):
'''
Returns the metadata of a harvest source
@ -42,6 +42,7 @@ def harvest_source_show(context,data_dict):
return source_dict
@side_effect_free
def harvest_source_show_status(context, data_dict):
'''
Returns a status report for a harvest source
@ -60,7 +61,7 @@ def harvest_source_show_status(context, data_dict):
source = harvest_model.HarvestSource.get(data_dict['id'])
if not source:
raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
raise p.toolkit.ObjectNotFound('Harvest source {0} does not exist'.format(data_dict['id']))
out = {
'job_count': 0,
@ -102,7 +103,7 @@ def harvest_source_show_status(context, data_dict):
return out
@side_effect_free
def harvest_source_list(context, data_dict):
check_access('harvest_source_list',context,data_dict)
@ -116,6 +117,7 @@ def harvest_source_list(context, data_dict):
context.update({'detailed':False})
return [harvest_source_dictize(source, context) for source in sources]
@side_effect_free
def harvest_source_for_a_dataset(context, data_dict):
'''For a given dataset, return the harvest source that
created or last updated it, otherwise NotFound.'''
@ -136,6 +138,7 @@ def harvest_source_for_a_dataset(context, data_dict):
return harvest_source_dictize(source,context)
@side_effect_free
def harvest_job_show(context,data_dict):
check_access('harvest_job_show',context,data_dict)
@ -149,6 +152,7 @@ def harvest_job_show(context,data_dict):
return harvest_job_dictize(job,context)
@side_effect_free
def harvest_job_report(context, data_dict):
check_access('harvest_job_show', context, data_dict)
@ -160,6 +164,24 @@ def harvest_job_report(context, data_dict):
if not job:
raise NotFound
report = {
'gather_errors': [],
'object_errors': []
}
# Gather errors
q = model.Session.query(harvest_model.HarvestGatherError) \
.join(harvest_model.HarvestJob) \
.filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestGatherError.created.desc())
for error in q.all():
report['gather_errors'].append({
'message': error.message
})
# Object errors
# Check if the harvester for this job's source has a method for returning
# the URL to the original document
original_url_builder = None
@ -173,19 +195,18 @@ def harvest_job_report(context, data_dict):
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
report = {}
for error, guid in q.all():
if not error.harvest_object_id in report:
report[error.harvest_object_id] = {
if not error.harvest_object_id in report['object_errors']:
report['object_errors'][error.harvest_object_id] = {
'guid': guid,
'errors': []
}
if original_url_builder:
url = original_url_builder(error.harvest_object_id)
if url:
report[error.harvest_object_id]['original_url'] = url
report['object_errors'][error.harvest_object_id]['original_url'] = url
report[error.harvest_object_id]['errors'].append({
report['object_errors'][error.harvest_object_id]['errors'].append({
'message': error.message,
'line': error.line,
'type': error.stage
@ -193,6 +214,7 @@ def harvest_job_report(context, data_dict):
return report
@side_effect_free
def harvest_job_list(context,data_dict):
check_access('harvest_job_list',context,data_dict)
@ -218,6 +240,7 @@ def harvest_job_list(context,data_dict):
context['return_error_summary'] = False
return [harvest_job_dictize(job, context) for job in jobs]
@side_effect_free
def harvest_object_show(context,data_dict):
check_access('harvest_object_show',context,data_dict)
@ -230,6 +253,7 @@ def harvest_object_show(context,data_dict):
return harvest_object_dictize(obj,context)
@side_effect_free
def harvest_object_list(context,data_dict):
check_access('harvest_object_list',context,data_dict)
@ -252,6 +276,7 @@ def harvest_object_list(context,data_dict):
return [getattr(obj,'id') for obj in objects]
@side_effect_free
def harvesters_info_show(context,data_dict):
check_access('harvesters_info_show',context,data_dict)

View File

@ -37,10 +37,6 @@ def harvest_job_dictize(job, context):
for status, count in stats:
out['stats'][status] = count
out['gather_errors'] = []
for error in job.gather_errors:
out['gather_errors'].append(error.as_dict())
if context.get('return_error_summary', True):
q = model.Session.query(HarvestObjectError.message, \
func.count(HarvestObjectError.message).label('error_count')) \
@ -49,9 +45,14 @@ def harvest_job_dictize(job, context):
.group_by(HarvestObjectError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
out['error_summary'] = q.all()
out['object_error_summary'] = q.all()
q = model.Session.query(HarvestGatherError.message, \
func.count(HarvestGatherError.message).label('error_count')) \
.filter(HarvestGatherError.harvest_job_id==job.id) \
.group_by(HarvestGatherError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
out['gather_error_summary'] = q.all()
return out
def harvest_object_dictize(obj, context):