147 lines
5.1 KiB
147 lines
5.1 KiB
from sqlalchemy import distinct
from ckan.model import Package
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \
HarvestGatherError, HarvestObjectError
def harvest_source_dictize(source, context):
out = source.as_dict()
out['jobs'] = []
for job in source.jobs:
out['status'] = _get_source_status(source, context)
return out
def harvest_job_dictize(job, context):
out = job.as_dict()
out['source'] = job.source_id
out['objects'] = []
out['gather_errors'] = []
for obj in job.objects:
for error in job.gather_errors:
return out
def harvest_object_dictize(obj, context):
out = obj.as_dict()
out['source'] = obj.harvest_source_id
out['job'] = obj.harvest_job_id
if obj.package:
out['package'] = obj.package.id
out['errors'] = []
for error in obj.errors:
return out
def _get_source_status(source, context):
model = context.get('model')
detailed = context.get('detailed',True)
out = dict()
job_count = HarvestJob.filter(source=source).count()
if not job_count:
out['msg'] = 'No jobs yet'
return out
out = {'next_harvest':'',
'overall_statistics':{'added':0, 'errors':0},
# Get next scheduled job
next_job = HarvestJob.filter(source=source,status=u'New').first()
if next_job:
out['next_harvest'] = 'Scheduled'
out['next_harvest'] = 'Not yet scheduled'
# Get the last finished job
last_job = HarvestJob.filter(source=source,status=u'Finished') \
if last_job:
#TODO: Should we encode the dates as strings?
out['last_harvest_request'] = str(last_job.gather_finished)
#Get HarvestObjects from last job whit links to packages
if detailed:
last_objects = [obj for obj in last_job.objects if obj.package is not None]
if len(last_objects) == 0:
# No packages added or updated
out['last_harvest_statistics']['added'] = 0
out['last_harvest_statistics']['updated'] = 0
# Check wether packages were added or updated
for last_object in last_objects:
# Check if the same package had been linked before
previous_objects = model.Session.query(HarvestObject) \
.filter(HarvestObject.package==last_object.package) \
if previous_objects == 1:
# It didn't previously exist, it has been added
out['last_harvest_statistics']['added'] += 1
# Pacakge already existed, but it has been updated
out['last_harvest_statistics']['updated'] += 1
# Last harvest errors
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = model.Session.query(HarvestObjectError).join(HarvestObject) \
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
+ object_errors.count()
if detailed:
for gather_error in last_job.gather_errors:
for object_error in object_errors:
err = {'object_id':object_error.object.id,'object_guid':object_error.object.guid,'message': object_error.message}
# Overall statistics
packages = model.Session.query(distinct(HarvestObject.package_id),Package.name) \
.join(Package).join(HarvestSource) \
.filter(HarvestObject.source==source) \
.filter(HarvestObject.current==True) \
out['overall_statistics']['added'] = packages.count()
if detailed:
for package in packages:
gather_errors = model.Session.query(HarvestGatherError) \
.join(HarvestJob).join(HarvestSource) \
object_errors = model.Session.query(HarvestObjectError) \
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
out['overall_statistics']['errors'] = gather_errors + object_errors
out['last_harvest_request'] = 'Not yet harvested'
return out