Merge branch '2.0-dataset-sources' of github.com:okfn/ckanext-harvest into 2.0-dataset-sources
This commit is contained in:
commit
ffce2c7915
|
@ -8,7 +8,7 @@ from ckan.plugins import PluginImplementations
|
|||
from ckanext.harvest.interfaces import IHarvester
|
||||
|
||||
import ckan.plugins as p
|
||||
from ckan.logic import NotFound, check_access
|
||||
from ckan.logic import NotFound, check_access, side_effect_free
|
||||
|
||||
from ckanext.harvest import model as harvest_model
|
||||
|
||||
|
@ -19,7 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
|
|||
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@side_effect_free
|
||||
def harvest_source_show(context,data_dict):
|
||||
'''
|
||||
Returns the metadata of a harvest source
|
||||
|
@ -42,6 +42,7 @@ def harvest_source_show(context,data_dict):
|
|||
|
||||
return source_dict
|
||||
|
||||
@side_effect_free
|
||||
def harvest_source_show_status(context, data_dict):
|
||||
'''
|
||||
Returns a status report for a harvest source
|
||||
|
@ -60,7 +61,7 @@ def harvest_source_show_status(context, data_dict):
|
|||
|
||||
source = harvest_model.HarvestSource.get(data_dict['id'])
|
||||
if not source:
|
||||
raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
|
||||
raise p.toolkit.ObjectNotFound('Harvest source {0} does not exist'.format(data_dict['id']))
|
||||
|
||||
out = {
|
||||
'job_count': 0,
|
||||
|
@ -102,7 +103,7 @@ def harvest_source_show_status(context, data_dict):
|
|||
|
||||
return out
|
||||
|
||||
|
||||
@side_effect_free
|
||||
def harvest_source_list(context, data_dict):
|
||||
|
||||
check_access('harvest_source_list',context,data_dict)
|
||||
|
@ -116,6 +117,7 @@ def harvest_source_list(context, data_dict):
|
|||
context.update({'detailed':False})
|
||||
return [harvest_source_dictize(source, context) for source in sources]
|
||||
|
||||
@side_effect_free
|
||||
def harvest_source_for_a_dataset(context, data_dict):
|
||||
'''For a given dataset, return the harvest source that
|
||||
created or last updated it, otherwise NotFound.'''
|
||||
|
@ -136,6 +138,7 @@ def harvest_source_for_a_dataset(context, data_dict):
|
|||
|
||||
return harvest_source_dictize(source,context)
|
||||
|
||||
@side_effect_free
|
||||
def harvest_job_show(context,data_dict):
|
||||
|
||||
check_access('harvest_job_show',context,data_dict)
|
||||
|
@ -149,6 +152,7 @@ def harvest_job_show(context,data_dict):
|
|||
|
||||
return harvest_job_dictize(job,context)
|
||||
|
||||
@side_effect_free
|
||||
def harvest_job_report(context, data_dict):
|
||||
|
||||
check_access('harvest_job_show', context, data_dict)
|
||||
|
@ -160,6 +164,24 @@ def harvest_job_report(context, data_dict):
|
|||
if not job:
|
||||
raise NotFound
|
||||
|
||||
report = {
|
||||
'gather_errors': [],
|
||||
'object_errors': []
|
||||
}
|
||||
|
||||
# Gather errors
|
||||
q = model.Session.query(harvest_model.HarvestGatherError) \
|
||||
.join(harvest_model.HarvestJob) \
|
||||
.filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
|
||||
.order_by(harvest_model.HarvestGatherError.created.desc())
|
||||
|
||||
for error in q.all():
|
||||
report['gather_errors'].append({
|
||||
'message': error.message
|
||||
})
|
||||
|
||||
# Object errors
|
||||
|
||||
# Check if the harvester for this job's source has a method for returning
|
||||
# the URL to the original document
|
||||
original_url_builder = None
|
||||
|
@ -173,19 +195,18 @@ def harvest_job_report(context, data_dict):
|
|||
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
||||
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
||||
|
||||
report = {}
|
||||
for error, guid in q.all():
|
||||
if not error.harvest_object_id in report:
|
||||
report[error.harvest_object_id] = {
|
||||
if not error.harvest_object_id in report['object_errors']:
|
||||
report['object_errors'][error.harvest_object_id] = {
|
||||
'guid': guid,
|
||||
'errors': []
|
||||
}
|
||||
if original_url_builder:
|
||||
url = original_url_builder(error.harvest_object_id)
|
||||
if url:
|
||||
report[error.harvest_object_id]['original_url'] = url
|
||||
report['object_errors'][error.harvest_object_id]['original_url'] = url
|
||||
|
||||
report[error.harvest_object_id]['errors'].append({
|
||||
report['object_errors'][error.harvest_object_id]['errors'].append({
|
||||
'message': error.message,
|
||||
'line': error.line,
|
||||
'type': error.stage
|
||||
|
@ -193,6 +214,7 @@ def harvest_job_report(context, data_dict):
|
|||
|
||||
return report
|
||||
|
||||
@side_effect_free
|
||||
def harvest_job_list(context,data_dict):
|
||||
|
||||
check_access('harvest_job_list',context,data_dict)
|
||||
|
@ -218,6 +240,7 @@ def harvest_job_list(context,data_dict):
|
|||
context['return_error_summary'] = False
|
||||
return [harvest_job_dictize(job, context) for job in jobs]
|
||||
|
||||
@side_effect_free
|
||||
def harvest_object_show(context,data_dict):
|
||||
|
||||
check_access('harvest_object_show',context,data_dict)
|
||||
|
@ -230,6 +253,7 @@ def harvest_object_show(context,data_dict):
|
|||
|
||||
return harvest_object_dictize(obj,context)
|
||||
|
||||
@side_effect_free
|
||||
def harvest_object_list(context,data_dict):
|
||||
|
||||
check_access('harvest_object_list',context,data_dict)
|
||||
|
@ -252,6 +276,7 @@ def harvest_object_list(context,data_dict):
|
|||
|
||||
return [getattr(obj,'id') for obj in objects]
|
||||
|
||||
@side_effect_free
|
||||
def harvesters_info_show(context,data_dict):
|
||||
|
||||
check_access('harvesters_info_show',context,data_dict)
|
||||
|
|
|
@ -37,10 +37,6 @@ def harvest_job_dictize(job, context):
|
|||
for status, count in stats:
|
||||
out['stats'][status] = count
|
||||
|
||||
out['gather_errors'] = []
|
||||
for error in job.gather_errors:
|
||||
out['gather_errors'].append(error.as_dict())
|
||||
|
||||
if context.get('return_error_summary', True):
|
||||
q = model.Session.query(HarvestObjectError.message, \
|
||||
func.count(HarvestObjectError.message).label('error_count')) \
|
||||
|
@ -49,9 +45,14 @@ def harvest_job_dictize(job, context):
|
|||
.group_by(HarvestObjectError.message) \
|
||||
.order_by('error_count desc') \
|
||||
.limit(context.get('error_summmary_limit', 20))
|
||||
|
||||
out['error_summary'] = q.all()
|
||||
|
||||
out['object_error_summary'] = q.all()
|
||||
q = model.Session.query(HarvestGatherError.message, \
|
||||
func.count(HarvestGatherError.message).label('error_count')) \
|
||||
.filter(HarvestGatherError.harvest_job_id==job.id) \
|
||||
.group_by(HarvestGatherError.message) \
|
||||
.order_by('error_count desc') \
|
||||
.limit(context.get('error_summmary_limit', 20))
|
||||
out['gather_error_summary'] = q.all()
|
||||
return out
|
||||
|
||||
def harvest_object_dictize(obj, context):
|
||||
|
|
Loading…
Reference in New Issue