Merge pull request #200 from ckan/fix-error-report-status

Fix job stats key
This commit is contained in:
David Read 2015-11-30 17:49:14 +00:00
commit 673a64a820
2 changed files with 74 additions and 65 deletions

View File

@ -1,9 +1,8 @@
from sqlalchemy import distinct, func
import ckan.logic as logic
from ckan.model import Package,Group
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \
HarvestGatherError, HarvestObjectError
from ckan.model import Package, Group
from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject,
HarvestGatherError, HarvestObjectError)
def harvest_source_dictize(source, context):
@ -17,15 +16,15 @@ def harvest_source_dictize(source, context):
publisher_id = out.get('publisher_id')
if publisher_id:
group = Group.get(publisher_id)
group = Group.get(publisher_id)
if group:
out['publisher_title'] = group.title
out['status'] = _get_source_status(source, context)
return out
def harvest_job_dictize(job, context):
out = job.as_dict()
@ -35,47 +34,51 @@ def harvest_job_dictize(job, context):
stats = model.Session.query(
HarvestObject.report_status,
func.count(HarvestObject.id).label('total_objects'))\
.filter_by(harvest_job_id=job.id)\
.group_by(HarvestObject.report_status).all()
.filter_by(harvest_job_id=job.id)\
.group_by(HarvestObject.report_status).all()
out['stats'] = {'added': 0, 'updated': 0, 'not modified': 0,
'errors': 0, 'deleted': 0}
'errored': 0, 'deleted': 0}
for status, count in stats:
out['stats'][status] = count
# We actually want to check which objects had errors, because they
# could have been added/updated anyway (eg bbox errors)
count = model.Session.query(func.distinct(HarvestObjectError.harvest_object_id)) \
.join(HarvestObject) \
.filter(HarvestObject.harvest_job_id==job.id) \
.count()
count = model.Session.query(
func.distinct(HarvestObjectError.harvest_object_id)) \
.join(HarvestObject) \
.filter(HarvestObject.harvest_job_id == job.id) \
.count()
if count > 0:
out['stats']['errored'] = count
out['stats']['errored'] = count
# Add gather errors to the error count
count = model.Session.query(HarvestGatherError) \
.filter(HarvestGatherError.harvest_job_id==job.id) \
.count()
.filter(HarvestGatherError.harvest_job_id == job.id) \
.count()
if count > 0:
out['stats']['errored'] = out['stats'].get('errored', 0) + count
out['stats']['errored'] = out['stats'].get('errored', 0) + count
if context.get('return_error_summary', True):
q = model.Session.query(HarvestObjectError.message, \
func.count(HarvestObjectError.message).label('error_count')) \
.join(HarvestObject) \
.filter(HarvestObject.harvest_job_id==job.id) \
.group_by(HarvestObjectError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
q = model.Session.query(
HarvestObjectError.message,
func.count(HarvestObjectError.message).label('error_count')) \
.join(HarvestObject) \
.filter(HarvestObject.harvest_job_id == job.id) \
.group_by(HarvestObjectError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
out['object_error_summary'] = q.all()
q = model.Session.query(HarvestGatherError.message, \
func.count(HarvestGatherError.message).label('error_count')) \
.filter(HarvestGatherError.harvest_job_id==job.id) \
.group_by(HarvestGatherError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
q = model.Session.query(
HarvestGatherError.message,
func.count(HarvestGatherError.message).label('error_count')) \
.filter(HarvestGatherError.harvest_job_id == job.id) \
.group_by(HarvestGatherError.message) \
.order_by('error_count desc') \
.limit(context.get('error_summmary_limit', 20))
out['gather_error_summary'] = q.all()
return out
def harvest_object_dictize(obj, context):
out = obj.as_dict()
out['source'] = obj.harvest_source_id
@ -94,26 +97,28 @@ def harvest_object_dictize(obj, context):
return out
def _get_source_status(source, context):
'''
TODO: Deprecated, use harvest_source_show_status instead
'''
model = context.get('model')
detailed = context.get('detailed',True)
detailed = context.get('detailed', True)
out = dict()
job_count = HarvestJob.filter(source=source).count()
out = {
'job_count': 0,
'next_harvest':'',
'last_harvest_request':'',
'last_harvest_statistics':{'added':0,'updated':0,'errors':0,'deleted':0},
'last_harvest_errors':{'gather':[],'object':[]},
'overall_statistics':{'added':0, 'errors':0},
'packages':[]}
'job_count': 0,
'next_harvest': '',
'last_harvest_request': '',
'last_harvest_statistics':
{'added': 0, 'updated': 0, 'errors': 0, 'deleted': 0},
'last_harvest_errors': {'gather': [], 'object': []},
'overall_statistics': {'added': 0, 'errors': 0},
'packages': []}
if not job_count:
out['msg'] = 'No jobs yet'
@ -122,15 +127,15 @@ def _get_source_status(source, context):
out['job_count'] = job_count
# Get next scheduled job
next_job = HarvestJob.filter(source=source,status=u'New').first()
next_job = HarvestJob.filter(source=source, status=u'New').first()
if next_job:
out['next_harvest'] = 'Scheduled'
else:
out['next_harvest'] = 'Not yet scheduled'
# Get the last finished job
last_job = HarvestJob.filter(source=source,status=u'Finished') \
.order_by(HarvestJob.created.desc()).first()
last_job = HarvestJob.filter(source=source, status=u'Finished') \
.order_by(HarvestJob.created.desc()).first()
if last_job:
#TODO: Should we encode the dates as strings?
@ -140,30 +145,35 @@ def _get_source_status(source, context):
harvest_job_dict = harvest_job_dictize(last_job, context)
# No packages added or updated
statistics = out['last_harvest_statistics']
statistics['added'] = harvest_job_dict['stats'].get('new',0)
statistics['updated'] = harvest_job_dict['stats'].get('updated',0)
statistics['deleted'] = harvest_job_dict['stats'].get('deleted',0)
statistics['errors'] = (harvest_job_dict['stats'].get('errored',0) +
len(last_job.gather_errors))
statistics['added'] = harvest_job_dict['stats'].get('new', 0)
statistics['updated'] = harvest_job_dict['stats'].get('updated', 0)
statistics['deleted'] = harvest_job_dict['stats'].get('deleted', 0)
statistics['errors'] = (
harvest_job_dict['stats'].get('errored', 0) +
len(last_job.gather_errors))
if detailed:
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = model.Session.query(HarvestObjectError).join(HarvestObject) \
.filter(HarvestObject.job==last_job)
# We have the gathering errors in last_job.gather_errors, so let's
# also get also the object errors.
object_errors = model.Session.query(HarvestObjectError)\
.join(HarvestObject) \
.filter(HarvestObject.job == last_job)
for gather_error in last_job.gather_errors:
out['last_harvest_errors']['gather'].append(gather_error.message)
for object_error in object_errors:
err = {'object_id':object_error.object.id,'object_guid':object_error.object.guid,'message': object_error.message}
err = {'object_id': object_error.object.id,
'object_guid': object_error.object.guid,
'message': object_error.message}
out['last_harvest_errors']['object'].append(err)
# Overall statistics
packages = model.Session.query(distinct(HarvestObject.package_id),Package.name) \
.join(Package).join(HarvestSource) \
.filter(HarvestObject.source==source) \
.filter(HarvestObject.current==True) \
.filter(Package.state==u'active')
packages = model.Session.query(distinct(HarvestObject.package_id),
Package.name) \
.join(Package).join(HarvestSource) \
.filter(HarvestObject.source == source) \
.filter(HarvestObject.current == True) \
.filter(Package.state == u'active')
out['overall_statistics']['added'] = packages.count()
if detailed:
@ -171,15 +181,14 @@ def _get_source_status(source, context):
out['packages'].append(package.name)
gather_errors = model.Session.query(HarvestGatherError) \
.join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source==source).count()
.join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source == source).count()
object_errors = model.Session.query(HarvestObjectError) \
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source==source).count()
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source == source).count()
out['overall_statistics']['errors'] = gather_errors + object_errors
else:
out['last_harvest_request'] = 'Not yet harvested'
return out

View File

@ -189,14 +189,14 @@ class TestHarvestQueue(object):
)
assert_equal(harvest_job['status'], u'Finished')
assert_equal(harvest_job['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errors': 0, 'deleted': 0})
assert_equal(harvest_job['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})
harvest_source_dict = logic.get_action('harvest_source_show')(
context,
{'id': harvest_source['id']}
)
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errors': 0, 'deleted': 0})
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})
assert_equal(harvest_source_dict['status']['total_datasets'], 3)
assert_equal(harvest_source_dict['status']['job_count'], 1)
@ -252,7 +252,7 @@ class TestHarvestQueue(object):
context,
{'id': job_id}
)
assert_equal(harvest_job['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errors': 0, 'deleted': 1})
assert_equal(harvest_job['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})
context['detailed'] = True
harvest_source_dict = logic.get_action('harvest_source_show')(
@ -260,6 +260,6 @@ class TestHarvestQueue(object):
{'id': harvest_source['id']}
)
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errors': 0, 'deleted': 1})
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})
assert_equal(harvest_source_dict['status']['total_datasets'], 2)
assert_equal(harvest_source_dict['status']['job_count'], 2)