2012-12-18 00:50:26 +01:00
|
|
|
from sqlalchemy import distinct, func
|
2012-02-29 11:59:02 +01:00
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
from ckan.model import Package, Group
|
|
|
|
from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject,
|
|
|
|
HarvestGatherError, HarvestObjectError)
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
def harvest_source_dictize(source, context):
|
2013-03-06 17:54:33 +01:00
|
|
|
'''
|
|
|
|
TODO: Deprecated
|
|
|
|
'''
|
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
out = source.as_dict()
|
|
|
|
|
2012-03-07 12:49:12 +01:00
|
|
|
out['publisher_title'] = u''
|
|
|
|
|
|
|
|
publisher_id = out.get('publisher_id')
|
|
|
|
if publisher_id:
|
2015-11-25 21:55:32 +01:00
|
|
|
group = Group.get(publisher_id)
|
2012-03-07 12:49:12 +01:00
|
|
|
if group:
|
|
|
|
out['publisher_title'] = group.title
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
out['status'] = _get_source_status(source, context)
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
def harvest_job_dictize(job, context):
|
|
|
|
out = job.as_dict()
|
|
|
|
|
2013-01-22 14:13:24 +01:00
|
|
|
model = context['model']
|
2012-02-29 11:59:02 +01:00
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
if context.get('return_stats', True):
|
2013-01-22 14:13:24 +01:00
|
|
|
stats = model.Session.query(
|
2012-12-18 00:50:26 +01:00
|
|
|
HarvestObject.report_status,
|
|
|
|
func.count(HarvestObject.id).label('total_objects'))\
|
2015-11-25 21:55:32 +01:00
|
|
|
.filter_by(harvest_job_id=job.id)\
|
|
|
|
.group_by(HarvestObject.report_status).all()
|
2015-11-17 13:45:00 +01:00
|
|
|
out['stats'] = {'added': 0, 'updated': 0, 'not modified': 0,
|
2015-11-25 21:55:32 +01:00
|
|
|
'errored': 0, 'deleted': 0}
|
2012-12-18 00:50:26 +01:00
|
|
|
for status, count in stats:
|
|
|
|
out['stats'][status] = count
|
|
|
|
|
2013-03-06 14:44:04 +01:00
|
|
|
# We actually want to check which objects had errors, because they
|
|
|
|
# could have been added/updated anyway (eg bbox errors)
|
2015-11-25 21:55:32 +01:00
|
|
|
count = model.Session.query(
|
|
|
|
func.distinct(HarvestObjectError.harvest_object_id)) \
|
|
|
|
.join(HarvestObject) \
|
|
|
|
.filter(HarvestObject.harvest_job_id == job.id) \
|
|
|
|
.count()
|
2013-03-06 14:44:04 +01:00
|
|
|
if count > 0:
|
2015-11-25 21:55:32 +01:00
|
|
|
out['stats']['errored'] = count
|
2013-03-06 14:44:04 +01:00
|
|
|
|
|
|
|
# Add gather errors to the error count
|
|
|
|
count = model.Session.query(HarvestGatherError) \
|
2015-11-25 21:55:32 +01:00
|
|
|
.filter(HarvestGatherError.harvest_job_id == job.id) \
|
|
|
|
.count()
|
2013-03-06 14:44:04 +01:00
|
|
|
if count > 0:
|
2015-11-25 21:55:32 +01:00
|
|
|
out['stats']['errored'] = out['stats'].get('errored', 0) + count
|
2013-03-06 14:44:04 +01:00
|
|
|
|
2013-02-04 19:28:45 +01:00
|
|
|
if context.get('return_error_summary', True):
|
2015-11-25 21:55:32 +01:00
|
|
|
q = model.Session.query(
|
|
|
|
HarvestObjectError.message,
|
|
|
|
func.count(HarvestObjectError.message).label('error_count')) \
|
|
|
|
.join(HarvestObject) \
|
|
|
|
.filter(HarvestObject.harvest_job_id == job.id) \
|
|
|
|
.group_by(HarvestObjectError.message) \
|
|
|
|
.order_by('error_count desc') \
|
|
|
|
.limit(context.get('error_summmary_limit', 20))
|
2013-02-25 18:17:08 +01:00
|
|
|
out['object_error_summary'] = q.all()
|
2015-11-25 21:55:32 +01:00
|
|
|
q = model.Session.query(
|
|
|
|
HarvestGatherError.message,
|
|
|
|
func.count(HarvestGatherError.message).label('error_count')) \
|
|
|
|
.filter(HarvestGatherError.harvest_job_id == job.id) \
|
|
|
|
.group_by(HarvestGatherError.message) \
|
|
|
|
.order_by('error_count desc') \
|
|
|
|
.limit(context.get('error_summmary_limit', 20))
|
2013-02-25 18:17:08 +01:00
|
|
|
out['gather_error_summary'] = q.all()
|
2012-02-29 11:59:02 +01:00
|
|
|
return out
|
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
def harvest_object_dictize(obj, context):
|
|
|
|
out = obj.as_dict()
|
|
|
|
out['source'] = obj.harvest_source_id
|
|
|
|
out['job'] = obj.harvest_job_id
|
|
|
|
|
|
|
|
if obj.package:
|
|
|
|
out['package'] = obj.package.id
|
|
|
|
|
|
|
|
out['errors'] = []
|
|
|
|
for error in obj.errors:
|
|
|
|
out['errors'].append(error.as_dict())
|
|
|
|
|
2012-11-13 13:06:36 +01:00
|
|
|
out['extras'] = {}
|
|
|
|
for extra in obj.extras:
|
|
|
|
out['extras'][extra.key] = extra.value
|
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
return out
|
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
def _get_source_status(source, context):
|
2013-03-06 17:54:33 +01:00
|
|
|
'''
|
|
|
|
TODO: Deprecated, use harvest_source_show_status instead
|
|
|
|
'''
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
model = context.get('model')
|
|
|
|
|
|
|
|
out = dict()
|
|
|
|
|
|
|
|
job_count = HarvestJob.filter(source=source).count()
|
|
|
|
|
2012-03-07 12:49:12 +01:00
|
|
|
out = {
|
2015-11-25 21:55:32 +01:00
|
|
|
'job_count': 0,
|
|
|
|
'next_harvest': '',
|
|
|
|
'last_harvest_request': '',
|
|
|
|
'overall_statistics': {'added': 0, 'errors': 0},
|
2015-12-08 11:14:15 +01:00
|
|
|
}
|
2012-02-29 11:59:02 +01:00
|
|
|
|
2012-05-09 16:58:23 +02:00
|
|
|
if not job_count:
|
|
|
|
out['msg'] = 'No jobs yet'
|
|
|
|
return out
|
|
|
|
else:
|
|
|
|
out['job_count'] = job_count
|
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
# Get next scheduled job
|
2015-11-25 21:55:32 +01:00
|
|
|
next_job = HarvestJob.filter(source=source, status=u'New').first()
|
2012-02-29 11:59:02 +01:00
|
|
|
if next_job:
|
|
|
|
out['next_harvest'] = 'Scheduled'
|
|
|
|
else:
|
|
|
|
out['next_harvest'] = 'Not yet scheduled'
|
|
|
|
|
|
|
|
# Get the last finished job
|
2015-11-25 21:55:32 +01:00
|
|
|
last_job = HarvestJob.filter(source=source, status=u'Finished') \
|
|
|
|
.order_by(HarvestJob.created.desc()).first()
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
if last_job:
|
|
|
|
#TODO: Should we encode the dates as strings?
|
|
|
|
out['last_harvest_request'] = str(last_job.gather_finished)
|
|
|
|
|
|
|
|
# Overall statistics
|
2015-11-25 21:55:32 +01:00
|
|
|
packages = model.Session.query(distinct(HarvestObject.package_id),
|
|
|
|
Package.name) \
|
|
|
|
.join(Package).join(HarvestSource) \
|
|
|
|
.filter(HarvestObject.source == source) \
|
|
|
|
.filter(HarvestObject.current == True) \
|
|
|
|
.filter(Package.state == u'active')
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
out['overall_statistics']['added'] = packages.count()
|
|
|
|
|
|
|
|
gather_errors = model.Session.query(HarvestGatherError) \
|
2015-11-25 21:55:32 +01:00
|
|
|
.join(HarvestJob).join(HarvestSource) \
|
|
|
|
.filter(HarvestJob.source == source).count()
|
2012-02-29 11:59:02 +01:00
|
|
|
|
|
|
|
object_errors = model.Session.query(HarvestObjectError) \
|
2015-11-25 21:55:32 +01:00
|
|
|
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
|
|
|
|
.filter(HarvestJob.source == source).count()
|
2012-02-29 11:59:02 +01:00
|
|
|
out['overall_statistics']['errors'] = gather_errors + object_errors
|
|
|
|
else:
|
|
|
|
out['last_harvest_request'] = 'Not yet harvested'
|
|
|
|
|
|
|
|
return out
|