[refactoring] Add status info to harvest sources. Updated dict functions and UI.

This commit is contained in:
Adrià Mercader 2011-04-11 16:30:56 +01:00
parent e2faa14b8d
commit 280dce1626
5 changed files with 129 additions and 26 deletions

View File

@ -85,10 +85,13 @@ class ViewController(BaseController):
redirect(h.url_for(controller='harvest', action='index'))
def show(self,id):
c.source = get_harvest_source(id)
try:
c.source = get_harvest_source(id)
return render('ckanext/harvest/show.html')
except:
abort(404,'Harvest source not found')
#TODO: show source reports
return render('ckanext/harvest/show.html')
def delete(self,id):
try:

View File

@ -54,6 +54,8 @@ class IHarvester(Interface):
responsible for:
- performing any necessary action with the fetched object (e.g
create a CKAN package).
Note: if this stage creates or updates a package, a reference
to the package should be added to the HarvestObject.
- creating the HarvestObject - Package relation (if necessary)
- creating and storing any suitable HarvestObjectErrors that may
occur.

View File

@ -1,22 +1,117 @@
from ckan.model import Session
from ckan.model import repo
from sqlalchemy import distinct,func
from ckan.model import Session, repo
from ckan.model import Package
from ckan.lib.base import config
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \
HarvestGatherError, HarvestObjectError
from ckanext.harvest.queue import get_gather_publisher
log = __import__("logging").getLogger(__name__)
def _get_source_status(source):
out = dict()
jobs = get_harvest_jobs(source=source)
if not len(jobs):
out['msg'] = 'No jobs yet'
return out
out = {'next_harvest':'',
'last_harvest_request':'',
'last_harvest_statistics':{'added':0,'updated':0,'errors':0},
'last_harvest_errors':[],
'overall_statistics':{'added':0, 'errors':0},
'packages':[]}
# Get next scheduled job
next_job = HarvestJob.filter(source=source,status=u'New').first()
if next_job:
out['next_harvest'] = 'Within 15 minutes'
else:
out['next_harvest'] = 'Not yet scheduled'
# Get the last finished job
last_job = HarvestJob.filter(source=source,status=u'Finished') \
.order_by(HarvestJob.created.desc()).limit(1).first()
if last_job:
out['last_harvest_request'] = last_job.gather_finished
#Get HarvestObjects from last job whit links to packages
last_objects = [obj for obj in last_job.objects if obj.package is not None]
if len(last_objects) == 0:
# No packages added or updated
out['last_harvest_statistics']['added'] = 0
out['last_harvest_statistics']['updated'] = 0
else:
# Check wether packages were added or updated
for last_object in last_objects:
# Check if the same package had been linked before
previous_objects = Session.query(HarvestObject) \
.filter(HarvestObject.package==last_object.package) \
.all()
if len(previous_objects) == 1:
# It didn't previously exist, it has been added
out['last_harvest_statistics']['added'] += 1
else:
# Pacakge already existed, but it has been updated
out['last_harvest_statistics']['updated'] += 1
# Last harvest errors
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = Session.query(HarvestObjectError).join(HarvestObject) \
.filter(HarvestObject.job==last_job).all()
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
+ len(object_errors)
for gather_error in last_job.gather_errors:
out['last_harvest_errors'].append(gather_error.message)
for object_error in object_errors:
out['last_harvest_errors'].append(object_error.message)
# Overall statistics
packages = Session.query(distinct(HarvestObject.package_id),Package.name) \
.join(Package).join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source==source).all()
out['overall_statistics']['added'] = len(packages)
for package in packages:
out['packages'].append(package.name)
gather_errors = Session.query(HarvestGatherError) \
.join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source==source).all()
object_errors = Session.query(HarvestObjectError) \
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
.filter(HarvestJob.source==source).all()
out['overall_statistics']['errors'] = len(gather_errors) + len(object_errors)
else:
out['last_harvest_request'] = 'Not yet harvested'
return out
def _source_as_dict(source):
out = source.as_dict()
out['jobs'] = []
for job in source.jobs:
out['jobs'].append(job.as_dict())
out['status'] = _get_source_status(source)
#TODO: Get some report data
return out
def _job_as_dict(job):

View File

@ -11,10 +11,13 @@
<div py:match="content">
<div class="harvest-content">
<py:if test="c.sources">
<h1>Harvesting Sources</h1>
<a id="new-harvest-source" href="harvest/create">Add a harvesting source</a>
<table id="harvest-sources">
<py:choose>
<py:when test="c.sources">
<table id="harvest-sources">
<tr>
<th></th>
<th></th>
@ -22,9 +25,8 @@
<th>URL</th>
<th>Type</th>
<th>Active</th>
<!-- <th>Status</th>
<th>Statistics</th>
<th>Next Harvest</th>-->
<th>Next Harvest</th>
<th>Created</th>
</tr>
@ -35,13 +37,17 @@
<td>${source.url}</td>
<td>${source.type}</td>
<td>${source.active}</td>
<!-- <td>${source.status.last_harvest_status}</td>
<td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td>
<td>${source.status.next_harvest}</td>-->
<td>${source.status.next_harvest}</td>
<td>${source.created}</td>
</tr>
</table>
</py:if>
</py:when>
<py:otherwise>
<div id="no-harvest-sources">No harvest sources defined yet.</div>
</py:otherwise>
</py:choose>
</div>
</div>
<xi:include href="../../layout.html" />

View File

@ -50,22 +50,20 @@
<th>Total jobs</th>
<td>${len(c.source.jobs)}</td>
</tr>
<!--
<tr>
<th>Status</th>
<td>
Last Harvest Status: ${c.source.status.last_harvest_status}<br/>
Last Harvest Errors: <span py:for="errors in c.source.status.last_harvest_errors"></span><br/>
Last Harvest Added or Updated:
Last Harvest Errors: ${c.source.status.last_harvest_statistics.errors}<br/>
<py:choose>
<py:when test="'added' in c.source.status.last_harvest_statistics">
${c.source.status.last_harvest_statistics.added}<br/>
<py:when test="len(c.source.status.last_harvest_errors)>0">
<ul>
<li py:for="error in c.source.status.last_harvest_errors">${error}</li>
</ul>
</py:when>
<py:otherwise>
None<br/>
</py:otherwise>
</py:choose>
Last Harvest Request: ${c.source.status.last_harvest_request} <br/>
Last Harvest Added: ${c.source.status.last_harvest_statistics.added}<br/>
Last Harvest Updated: ${c.source.status.last_harvest_statistics.updated}<br/>
Last Harvest: ${c.source.status.last_harvest_request} <br/>
Next Harvest: ${c.source.status.next_harvest}
</td>
</tr>
@ -87,7 +85,6 @@
</div>
</td>
</tr>
-->
</table>
</py:if>
</div>