[refactoring] Add status info to harvest sources. Updated dict functions and UI.
This commit is contained in:
parent
e2faa14b8d
commit
280dce1626
|
@ -85,10 +85,13 @@ class ViewController(BaseController):
|
|||
redirect(h.url_for(controller='harvest', action='index'))
|
||||
|
||||
def show(self,id):
|
||||
c.source = get_harvest_source(id)
|
||||
try:
|
||||
c.source = get_harvest_source(id)
|
||||
|
||||
return render('ckanext/harvest/show.html')
|
||||
except:
|
||||
abort(404,'Harvest source not found')
|
||||
|
||||
#TODO: show source reports
|
||||
return render('ckanext/harvest/show.html')
|
||||
|
||||
def delete(self,id):
|
||||
try:
|
||||
|
|
|
@ -54,6 +54,8 @@ class IHarvester(Interface):
|
|||
responsible for:
|
||||
- performing any necessary action with the fetched object (e.g
|
||||
create a CKAN package).
|
||||
Note: if this stage creates or updates a package, a reference
|
||||
to the package should be added to the HarvestObject.
|
||||
- creating the HarvestObject - Package relation (if necessary)
|
||||
- creating and storing any suitable HarvestObjectErrors that may
|
||||
occur.
|
||||
|
|
|
@ -1,22 +1,117 @@
|
|||
from ckan.model import Session
|
||||
from ckan.model import repo
|
||||
from sqlalchemy import distinct,func
|
||||
from ckan.model import Session, repo
|
||||
from ckan.model import Package
|
||||
from ckan.lib.base import config
|
||||
|
||||
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
|
||||
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \
|
||||
HarvestGatherError, HarvestObjectError
|
||||
from ckanext.harvest.queue import get_gather_publisher
|
||||
|
||||
log = __import__("logging").getLogger(__name__)
|
||||
|
||||
|
||||
def _get_source_status(source):
|
||||
out = dict()
|
||||
|
||||
jobs = get_harvest_jobs(source=source)
|
||||
|
||||
if not len(jobs):
|
||||
out['msg'] = 'No jobs yet'
|
||||
return out
|
||||
out = {'next_harvest':'',
|
||||
'last_harvest_request':'',
|
||||
'last_harvest_statistics':{'added':0,'updated':0,'errors':0},
|
||||
'last_harvest_errors':[],
|
||||
'overall_statistics':{'added':0, 'errors':0},
|
||||
'packages':[]}
|
||||
|
||||
# Get next scheduled job
|
||||
next_job = HarvestJob.filter(source=source,status=u'New').first()
|
||||
if next_job:
|
||||
out['next_harvest'] = 'Within 15 minutes'
|
||||
else:
|
||||
out['next_harvest'] = 'Not yet scheduled'
|
||||
|
||||
# Get the last finished job
|
||||
last_job = HarvestJob.filter(source=source,status=u'Finished') \
|
||||
.order_by(HarvestJob.created.desc()).limit(1).first()
|
||||
|
||||
if last_job:
|
||||
out['last_harvest_request'] = last_job.gather_finished
|
||||
|
||||
|
||||
#Get HarvestObjects from last job whit links to packages
|
||||
last_objects = [obj for obj in last_job.objects if obj.package is not None]
|
||||
|
||||
if len(last_objects) == 0:
|
||||
# No packages added or updated
|
||||
out['last_harvest_statistics']['added'] = 0
|
||||
out['last_harvest_statistics']['updated'] = 0
|
||||
else:
|
||||
# Check wether packages were added or updated
|
||||
for last_object in last_objects:
|
||||
# Check if the same package had been linked before
|
||||
previous_objects = Session.query(HarvestObject) \
|
||||
.filter(HarvestObject.package==last_object.package) \
|
||||
.all()
|
||||
|
||||
if len(previous_objects) == 1:
|
||||
# It didn't previously exist, it has been added
|
||||
out['last_harvest_statistics']['added'] += 1
|
||||
else:
|
||||
# Pacakge already existed, but it has been updated
|
||||
out['last_harvest_statistics']['updated'] += 1
|
||||
|
||||
# Last harvest errors
|
||||
# We have the gathering errors in last_job.gather_errors, so let's also
|
||||
# get also the object errors.
|
||||
object_errors = Session.query(HarvestObjectError).join(HarvestObject) \
|
||||
.filter(HarvestObject.job==last_job).all()
|
||||
|
||||
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
|
||||
+ len(object_errors)
|
||||
for gather_error in last_job.gather_errors:
|
||||
out['last_harvest_errors'].append(gather_error.message)
|
||||
|
||||
for object_error in object_errors:
|
||||
out['last_harvest_errors'].append(object_error.message)
|
||||
|
||||
|
||||
# Overall statistics
|
||||
packages = Session.query(distinct(HarvestObject.package_id),Package.name) \
|
||||
.join(Package).join(HarvestJob).join(HarvestSource) \
|
||||
.filter(HarvestJob.source==source).all()
|
||||
|
||||
out['overall_statistics']['added'] = len(packages)
|
||||
for package in packages:
|
||||
out['packages'].append(package.name)
|
||||
|
||||
gather_errors = Session.query(HarvestGatherError) \
|
||||
.join(HarvestJob).join(HarvestSource) \
|
||||
.filter(HarvestJob.source==source).all()
|
||||
|
||||
object_errors = Session.query(HarvestObjectError) \
|
||||
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
|
||||
.filter(HarvestJob.source==source).all()
|
||||
out['overall_statistics']['errors'] = len(gather_errors) + len(object_errors)
|
||||
else:
|
||||
out['last_harvest_request'] = 'Not yet harvested'
|
||||
|
||||
return out
|
||||
|
||||
|
||||
|
||||
|
||||
def _source_as_dict(source):
|
||||
out = source.as_dict()
|
||||
out['jobs'] = []
|
||||
|
||||
for job in source.jobs:
|
||||
out['jobs'].append(job.as_dict())
|
||||
|
||||
out['status'] = _get_source_status(source)
|
||||
|
||||
|
||||
#TODO: Get some report data
|
||||
|
||||
return out
|
||||
|
||||
def _job_as_dict(job):
|
||||
|
|
|
@ -11,10 +11,13 @@
|
|||
|
||||
<div py:match="content">
|
||||
<div class="harvest-content">
|
||||
<py:if test="c.sources">
|
||||
<h1>Harvesting Sources</h1>
|
||||
<a id="new-harvest-source" href="harvest/create">Add a harvesting source</a>
|
||||
<table id="harvest-sources">
|
||||
<py:choose>
|
||||
<py:when test="c.sources">
|
||||
|
||||
|
||||
<table id="harvest-sources">
|
||||
<tr>
|
||||
<th></th>
|
||||
<th></th>
|
||||
|
@ -22,9 +25,8 @@
|
|||
<th>URL</th>
|
||||
<th>Type</th>
|
||||
<th>Active</th>
|
||||
<!-- <th>Status</th>
|
||||
<th>Statistics</th>
|
||||
<th>Next Harvest</th>-->
|
||||
<th>Next Harvest</th>
|
||||
<th>Created</th>
|
||||
</tr>
|
||||
|
||||
|
@ -35,13 +37,17 @@
|
|||
<td>${source.url}</td>
|
||||
<td>${source.type}</td>
|
||||
<td>${source.active}</td>
|
||||
<!-- <td>${source.status.last_harvest_status}</td>
|
||||
<td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td>
|
||||
<td>${source.status.next_harvest}</td>-->
|
||||
<td>${source.status.next_harvest}</td>
|
||||
<td>${source.created}</td>
|
||||
</tr>
|
||||
</table>
|
||||
</py:if>
|
||||
</py:when>
|
||||
<py:otherwise>
|
||||
<div id="no-harvest-sources">No harvest sources defined yet.</div>
|
||||
</py:otherwise>
|
||||
</py:choose>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<xi:include href="../../layout.html" />
|
||||
|
|
|
@ -50,22 +50,20 @@
|
|||
<th>Total jobs</th>
|
||||
<td>${len(c.source.jobs)}</td>
|
||||
</tr>
|
||||
<!--
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<td>
|
||||
Last Harvest Status: ${c.source.status.last_harvest_status}<br/>
|
||||
Last Harvest Errors: <span py:for="errors in c.source.status.last_harvest_errors"></span><br/>
|
||||
Last Harvest Added or Updated:
|
||||
Last Harvest Errors: ${c.source.status.last_harvest_statistics.errors}<br/>
|
||||
<py:choose>
|
||||
<py:when test="'added' in c.source.status.last_harvest_statistics">
|
||||
${c.source.status.last_harvest_statistics.added}<br/>
|
||||
<py:when test="len(c.source.status.last_harvest_errors)>0">
|
||||
<ul>
|
||||
<li py:for="error in c.source.status.last_harvest_errors">${error}</li>
|
||||
</ul>
|
||||
</py:when>
|
||||
<py:otherwise>
|
||||
None<br/>
|
||||
</py:otherwise>
|
||||
</py:choose>
|
||||
Last Harvest Request: ${c.source.status.last_harvest_request} <br/>
|
||||
Last Harvest Added: ${c.source.status.last_harvest_statistics.added}<br/>
|
||||
Last Harvest Updated: ${c.source.status.last_harvest_statistics.updated}<br/>
|
||||
Last Harvest: ${c.source.status.last_harvest_request} <br/>
|
||||
Next Harvest: ${c.source.status.next_harvest}
|
||||
</td>
|
||||
</tr>
|
||||
|
@ -87,7 +85,6 @@
|
|||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
-->
|
||||
</table>
|
||||
</py:if>
|
||||
</div>
|
||||
|
|
Loading…
Reference in New Issue