Merge branch '2.0-dataset-sources' of github.com:okfn/ckanext-harvest into 2.0-dataset-sources
This commit is contained in:
commit
ffce2c7915
|
@ -8,7 +8,7 @@ from ckan.plugins import PluginImplementations
|
||||||
from ckanext.harvest.interfaces import IHarvester
|
from ckanext.harvest.interfaces import IHarvester
|
||||||
|
|
||||||
import ckan.plugins as p
|
import ckan.plugins as p
|
||||||
from ckan.logic import NotFound, check_access
|
from ckan.logic import NotFound, check_access, side_effect_free
|
||||||
|
|
||||||
from ckanext.harvest import model as harvest_model
|
from ckanext.harvest import model as harvest_model
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
|
||||||
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
|
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_source_show(context,data_dict):
|
def harvest_source_show(context,data_dict):
|
||||||
'''
|
'''
|
||||||
Returns the metadata of a harvest source
|
Returns the metadata of a harvest source
|
||||||
|
@ -42,6 +42,7 @@ def harvest_source_show(context,data_dict):
|
||||||
|
|
||||||
return source_dict
|
return source_dict
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_source_show_status(context, data_dict):
|
def harvest_source_show_status(context, data_dict):
|
||||||
'''
|
'''
|
||||||
Returns a status report for a harvest source
|
Returns a status report for a harvest source
|
||||||
|
@ -60,7 +61,7 @@ def harvest_source_show_status(context, data_dict):
|
||||||
|
|
||||||
source = harvest_model.HarvestSource.get(data_dict['id'])
|
source = harvest_model.HarvestSource.get(data_dict['id'])
|
||||||
if not source:
|
if not source:
|
||||||
raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
|
raise p.toolkit.ObjectNotFound('Harvest source {0} does not exist'.format(data_dict['id']))
|
||||||
|
|
||||||
out = {
|
out = {
|
||||||
'job_count': 0,
|
'job_count': 0,
|
||||||
|
@ -102,7 +103,7 @@ def harvest_source_show_status(context, data_dict):
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_source_list(context, data_dict):
|
def harvest_source_list(context, data_dict):
|
||||||
|
|
||||||
check_access('harvest_source_list',context,data_dict)
|
check_access('harvest_source_list',context,data_dict)
|
||||||
|
@ -116,6 +117,7 @@ def harvest_source_list(context, data_dict):
|
||||||
context.update({'detailed':False})
|
context.update({'detailed':False})
|
||||||
return [harvest_source_dictize(source, context) for source in sources]
|
return [harvest_source_dictize(source, context) for source in sources]
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_source_for_a_dataset(context, data_dict):
|
def harvest_source_for_a_dataset(context, data_dict):
|
||||||
'''For a given dataset, return the harvest source that
|
'''For a given dataset, return the harvest source that
|
||||||
created or last updated it, otherwise NotFound.'''
|
created or last updated it, otherwise NotFound.'''
|
||||||
|
@ -136,6 +138,7 @@ def harvest_source_for_a_dataset(context, data_dict):
|
||||||
|
|
||||||
return harvest_source_dictize(source,context)
|
return harvest_source_dictize(source,context)
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_job_show(context,data_dict):
|
def harvest_job_show(context,data_dict):
|
||||||
|
|
||||||
check_access('harvest_job_show',context,data_dict)
|
check_access('harvest_job_show',context,data_dict)
|
||||||
|
@ -149,6 +152,7 @@ def harvest_job_show(context,data_dict):
|
||||||
|
|
||||||
return harvest_job_dictize(job,context)
|
return harvest_job_dictize(job,context)
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_job_report(context, data_dict):
|
def harvest_job_report(context, data_dict):
|
||||||
|
|
||||||
check_access('harvest_job_show', context, data_dict)
|
check_access('harvest_job_show', context, data_dict)
|
||||||
|
@ -160,6 +164,24 @@ def harvest_job_report(context, data_dict):
|
||||||
if not job:
|
if not job:
|
||||||
raise NotFound
|
raise NotFound
|
||||||
|
|
||||||
|
report = {
|
||||||
|
'gather_errors': [],
|
||||||
|
'object_errors': []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gather errors
|
||||||
|
q = model.Session.query(harvest_model.HarvestGatherError) \
|
||||||
|
.join(harvest_model.HarvestJob) \
|
||||||
|
.filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
|
||||||
|
.order_by(harvest_model.HarvestGatherError.created.desc())
|
||||||
|
|
||||||
|
for error in q.all():
|
||||||
|
report['gather_errors'].append({
|
||||||
|
'message': error.message
|
||||||
|
})
|
||||||
|
|
||||||
|
# Object errors
|
||||||
|
|
||||||
# Check if the harvester for this job's source has a method for returning
|
# Check if the harvester for this job's source has a method for returning
|
||||||
# the URL to the original document
|
# the URL to the original document
|
||||||
original_url_builder = None
|
original_url_builder = None
|
||||||
|
@ -173,19 +195,18 @@ def harvest_job_report(context, data_dict):
|
||||||
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
||||||
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
||||||
|
|
||||||
report = {}
|
|
||||||
for error, guid in q.all():
|
for error, guid in q.all():
|
||||||
if not error.harvest_object_id in report:
|
if not error.harvest_object_id in report['object_errors']:
|
||||||
report[error.harvest_object_id] = {
|
report['object_errors'][error.harvest_object_id] = {
|
||||||
'guid': guid,
|
'guid': guid,
|
||||||
'errors': []
|
'errors': []
|
||||||
}
|
}
|
||||||
if original_url_builder:
|
if original_url_builder:
|
||||||
url = original_url_builder(error.harvest_object_id)
|
url = original_url_builder(error.harvest_object_id)
|
||||||
if url:
|
if url:
|
||||||
report[error.harvest_object_id]['original_url'] = url
|
report['object_errors'][error.harvest_object_id]['original_url'] = url
|
||||||
|
|
||||||
report[error.harvest_object_id]['errors'].append({
|
report['object_errors'][error.harvest_object_id]['errors'].append({
|
||||||
'message': error.message,
|
'message': error.message,
|
||||||
'line': error.line,
|
'line': error.line,
|
||||||
'type': error.stage
|
'type': error.stage
|
||||||
|
@ -193,6 +214,7 @@ def harvest_job_report(context, data_dict):
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_job_list(context,data_dict):
|
def harvest_job_list(context,data_dict):
|
||||||
|
|
||||||
check_access('harvest_job_list',context,data_dict)
|
check_access('harvest_job_list',context,data_dict)
|
||||||
|
@ -218,6 +240,7 @@ def harvest_job_list(context,data_dict):
|
||||||
context['return_error_summary'] = False
|
context['return_error_summary'] = False
|
||||||
return [harvest_job_dictize(job, context) for job in jobs]
|
return [harvest_job_dictize(job, context) for job in jobs]
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_object_show(context,data_dict):
|
def harvest_object_show(context,data_dict):
|
||||||
|
|
||||||
check_access('harvest_object_show',context,data_dict)
|
check_access('harvest_object_show',context,data_dict)
|
||||||
|
@ -230,6 +253,7 @@ def harvest_object_show(context,data_dict):
|
||||||
|
|
||||||
return harvest_object_dictize(obj,context)
|
return harvest_object_dictize(obj,context)
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvest_object_list(context,data_dict):
|
def harvest_object_list(context,data_dict):
|
||||||
|
|
||||||
check_access('harvest_object_list',context,data_dict)
|
check_access('harvest_object_list',context,data_dict)
|
||||||
|
@ -252,6 +276,7 @@ def harvest_object_list(context,data_dict):
|
||||||
|
|
||||||
return [getattr(obj,'id') for obj in objects]
|
return [getattr(obj,'id') for obj in objects]
|
||||||
|
|
||||||
|
@side_effect_free
|
||||||
def harvesters_info_show(context,data_dict):
|
def harvesters_info_show(context,data_dict):
|
||||||
|
|
||||||
check_access('harvesters_info_show',context,data_dict)
|
check_access('harvesters_info_show',context,data_dict)
|
||||||
|
|
|
@ -37,10 +37,6 @@ def harvest_job_dictize(job, context):
|
||||||
for status, count in stats:
|
for status, count in stats:
|
||||||
out['stats'][status] = count
|
out['stats'][status] = count
|
||||||
|
|
||||||
out['gather_errors'] = []
|
|
||||||
for error in job.gather_errors:
|
|
||||||
out['gather_errors'].append(error.as_dict())
|
|
||||||
|
|
||||||
if context.get('return_error_summary', True):
|
if context.get('return_error_summary', True):
|
||||||
q = model.Session.query(HarvestObjectError.message, \
|
q = model.Session.query(HarvestObjectError.message, \
|
||||||
func.count(HarvestObjectError.message).label('error_count')) \
|
func.count(HarvestObjectError.message).label('error_count')) \
|
||||||
|
@ -49,9 +45,14 @@ def harvest_job_dictize(job, context):
|
||||||
.group_by(HarvestObjectError.message) \
|
.group_by(HarvestObjectError.message) \
|
||||||
.order_by('error_count desc') \
|
.order_by('error_count desc') \
|
||||||
.limit(context.get('error_summmary_limit', 20))
|
.limit(context.get('error_summmary_limit', 20))
|
||||||
|
out['object_error_summary'] = q.all()
|
||||||
out['error_summary'] = q.all()
|
q = model.Session.query(HarvestGatherError.message, \
|
||||||
|
func.count(HarvestGatherError.message).label('error_count')) \
|
||||||
|
.filter(HarvestGatherError.harvest_job_id==job.id) \
|
||||||
|
.group_by(HarvestGatherError.message) \
|
||||||
|
.order_by('error_count desc') \
|
||||||
|
.limit(context.get('error_summmary_limit', 20))
|
||||||
|
out['gather_error_summary'] = q.all()
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def harvest_object_dictize(obj, context):
|
def harvest_object_dictize(obj, context):
|
||||||
|
|
Loading…
Reference in New Issue