[refactoring] Use the common functions in the web interface.

Not yet implemented in create and edit harvest source as they use the DGU forms API.
Also TODO, think of what report info is needed in the listing and details page.
This commit is contained in:
Adrià Mercader 2011-04-05 13:39:23 +01:00
parent 3d32a18802
commit 4023bb7222
5 changed files with 86 additions and 102 deletions

View File

@ -8,6 +8,7 @@ from ckan.lib.base import BaseController, c, g, request, \
from ckan.model import Package from ckan.model import Package
from ckanext.harvest.lib import *
class ViewController(BaseController): class ViewController(BaseController):
@ -32,39 +33,19 @@ class ViewController(BaseController):
if data: if data:
http_request.add_data(data) http_request.add_data(data)
try: try:
return urllib2.urlopen(http_request) return urllib2.urlopen(http_request)
except urllib2.HTTPError as e: except urllib2.HTTPError as e:
raise raise
def index(self): def index(self):
# Request all harvesting sources # Request all harvest sources
sources_url = self.api_url + '/harvestsource' c.sources = get_harvest_sources()
try:
doc = self._do_request(sources_url).read()
sources_ids = json.loads(doc) #TODO: show source reports
source_url = sources_url + '/%s'
sources = []
# For each source, request its details
for source_id in sources_ids:
doc = self._do_request(source_url % source_id).read()
sources.append(json.loads(doc))
c.sources = sources
except urllib2.HTTPError as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
h.flash_error(msg)
except urllib2.URLError as e:
msg = 'Could not find server %r: %r' % (sources_url, e)
h.flash_error(msg)
return render('ckanext/harvest/index.html') return render('ckanext/harvest/index.html')
def create(self): def create(self):
# This is the DGU form API, so we don't use self.api_url # This is the DGU form API, so we don't use self.api_url
@ -91,37 +72,30 @@ class ViewController(BaseController):
data = json.dumps(data) data = json.dumps(data)
try: try:
r = self._do_request(form_url,data) r = self._do_request(form_url,data)
h.flash_success('Harvesting source added successfully') h.flash_success('Harvesting source added successfully')
except urllib2.HTTPError as e: except urllib2.HTTPError as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
# The form API returns just a 500, so we are not exactly sure of what # The form API returns just a 500, so we are not exactly sure of what
# happened, but most probably it was a duplicate entry # happened, but most probably it was a duplicate entry
if e.getcode() == 500: if e.getcode() == 500:
msg = msg + ' Does the source already exist?' msg = msg + ' Does the source already exist?'
h.flash_error(msg) h.flash_error(msg)
finally: finally:
redirect(h.url_for(controller='harvest', action='index')) redirect(h.url_for(controller='harvest', action='index'))
def show(self,id):
sources_url = self.api_url + '/harvestsource/%s' % id
try:
doc = self._do_request(sources_url).read()
c.source = json.loads(doc)
except urllib2.HTTPError as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
h.flash_error(msg)
def show(self,id):
c.source = get_harvest_source(id)
#TODO: show source reports
return render('ckanext/harvest/show.html') return render('ckanext/harvest/show.html')
def delete(self,id): def delete(self,id):
form_url = self.form_api_url + '/harvestsource/delete/%s' % id
try: try:
r = self._do_request(form_url) delete_harvest_source(id)
h.flash_success('Harvesting source deleted successfully') h.flash_success('Harvesting source deleted successfully')
except urllib2.HTTPError as e: except Exception as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) msg = 'An error occurred: [%s]' % e.message
h.flash_error(msg) h.flash_error(msg)
redirect(h.url_for(controller='harvest', action='index', id=None)) redirect(h.url_for(controller='harvest', action='index', id=None))
@ -147,34 +121,23 @@ class ViewController(BaseController):
data = json.dumps(data) data = json.dumps(data)
try: try:
r = self._do_request(form_url,data) r = self._do_request(form_url,data)
h.flash_success('Harvesting source edited successfully') h.flash_success('Harvesting source edited successfully')
except urllib2.HTTPError as e: except urllib2.HTTPError as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
h.flash_error(msg) h.flash_error(msg)
finally: finally:
redirect(h.url_for(controller='harvest', action='index', id=None)) redirect(h.url_for(controller='harvest', action='index', id=None))
def create_harvesting_job(self,id): def create_harvesting_job(self,id):
form_url = self.api_url + '/harvestingjob'
data = {
'source_id': id,
'user_ref': ''
}
data = json.dumps(data)
try: try:
r = self._do_request(form_url,data) create_harvest_job(id)
h.flash_success('Refresh requested, harvesting will take place within 15 minutes.') h.flash_success('Refresh requested, harvesting will take place within 15 minutes.')
except urllib2.HTTPError as e: except Exception as e:
msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) msg = 'An error occurred: [%s]' % e.message
if e.getcode() == 400: h.flash_error(msg)
msg = msg + ' ' + e.read()
h.flash_error(msg)
finally:
redirect(h.url_for(controller='harvest', action='index', id=None))
redirect(h.url_for(controller='harvest', action='index', id=None))
def map_view(self,id): def map_view(self,id):
#check if package exists #check if package exists

View File

@ -18,6 +18,8 @@ def _source_as_dict(source):
for obj in source.objects: for obj in source.objects:
out['objects'].append(obj.as_dict()) out['objects'].append(obj.as_dict())
#TODO: Get some report data
return out return out
def _job_as_dict(job): def _job_as_dict(job):
@ -35,9 +37,9 @@ def _job_as_dict(job):
for error in job.gather_errors: for error in job.gather_errors:
out['object_errors'].append(error.as_dict()) out['object_errors'].append(error.as_dict())
return out return out
def get_harvest_source(id,default=Exception,attr=None): def get_harvest_source(id,default=Exception,attr=None):
source = HarvestSource.get(id,default=default,attr=attr) source = HarvestSource.get(id,default=default,attr=attr)
@ -56,7 +58,7 @@ def create_harvest_source(source_dict):
exists = get_harvest_sources(url=source_dict['url']) exists = get_harvest_sources(url=source_dict['url'])
if len(exists): if len(exists):
raise Exception('There is already a Harvest Source for this URL: %s' % source_dict['url']) raise Exception('There is already a Harvest Source for this URL: %s' % source_dict['url'])
source = HarvestSource() source = HarvestSource()
source.url = source_dict['url'] source.url = source_dict['url']
source.type = source_dict['type'] source.type = source_dict['type']
@ -66,10 +68,10 @@ def create_harvest_source(source_dict):
source.__setattr__(o,source_dict[o]) source.__setattr__(o,source_dict[o])
source.save() source.save()
return _source_as_dict(source) return _source_as_dict(source)
def delete_harvest_source(source_id): def delete_harvest_source(source_id):
try: try:
@ -79,7 +81,7 @@ def delete_harvest_source(source_id):
source.delete() source.delete()
repo.commit_and_remove() repo.commit_and_remove()
#TODO: Jobs? #TODO: Jobs?
return True return True
@ -95,7 +97,8 @@ def get_harvest_jobs(**kwds):
def create_harvest_job(source_id): def create_harvest_job(source_id):
# Check if source exists # Check if source exists
try: try:
source = get_harvest_source(source_id) #We'll need the actual HarvestSource
source = HarvestSource.get(source_id)
except: except:
raise Exception('Source %s does not exist' % source_id) raise Exception('Source %s does not exist' % source_id)
@ -106,7 +109,7 @@ def create_harvest_job(source_id):
job = HarvestJob() job = HarvestJob()
job.source = source job.source = source
job.save() job.save()
return _job_as_dict(job) return _job_as_dict(job)
@ -119,7 +122,7 @@ def delete_harvest_job(job_id):
job.delete() job.delete()
repo.commit_and_remove() repo.commit_and_remove()
#TODO: objects? #TODO: objects?
return True return True
@ -141,7 +144,7 @@ def get_srid(crs):
return int(srid) return int(srid)
#TODO: move to ckanext-?? for geo stuff #TODO: move to ckanext-?? for geo stuff
def save_extent(package,extent=False): def save_extent(package,extent=False):
'''Updates the package extent in the package_extent geometry column '''Updates the package extent in the package_extent geometry column
If no extent provided (as a dict with minx,miny,maxx,maxy and srid keys), If no extent provided (as a dict with minx,miny,maxx,maxy and srid keys),
@ -152,12 +155,12 @@ def save_extent(package,extent=False):
srid = None srid = None
if extent: if extent:
minx = extent['minx'] minx = extent['minx']
miny = extent['miny'] miny = extent['miny']
maxx = extent['maxx'] maxx = extent['maxx']
maxy = extent['maxy'] maxy = extent['maxy']
if 'srid' in extent: if 'srid' in extent:
srid = extent['srid'] srid = extent['srid']
else: else:
minx = float(package.extras.get('bbox-east-long')) minx = float(package.extras.get('bbox-east-long'))
miny = float(package.extras.get('bbox-south-lat')) miny = float(package.extras.get('bbox-south-lat'))
@ -165,22 +168,22 @@ def save_extent(package,extent=False):
maxy = float(package.extras.get('bbox-north-lat')) maxy = float(package.extras.get('bbox-north-lat'))
crs = package.extras.get('spatial-reference-system') crs = package.extras.get('spatial-reference-system')
if crs: if crs:
srid = get_srid(crs) srid = get_srid(crs)
try: try:
# Check if extent already exists # Check if extent already exists
rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall() rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall()
update =(len(rows) > 0) update =(len(rows) > 0)
params = {'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy, 'db_srid': db_srid} params = {'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy, 'db_srid': db_srid}
if update: if update:
# Update # Update
if srid and srid != db_srid: if srid and srid != db_srid:
# We need to reproject the input geometry # We need to reproject the input geometry
statement = """UPDATE package_extent SET statement = """UPDATE package_extent SET
the_geom = ST_Transform( the_geom = ST_Transform(
ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s, %(maxx)s %(miny)s,
%(maxx)s %(maxy)s, %(maxx)s %(maxy)s,
%(minx)s %(maxy)s, %(minx)s %(maxy)s,
@ -190,15 +193,15 @@ def save_extent(package,extent=False):
""" """
params.update({'srid': srid}) params.update({'srid': srid})
else: else:
statement = """UPDATE package_extent SET statement = """UPDATE package_extent SET
the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s, %(maxx)s %(miny)s,
%(maxx)s %(maxy)s, %(maxx)s %(maxy)s,
%(minx)s %(maxy)s, %(minx)s %(maxy)s,
%(minx)s %(miny)s))',%(db_srid)s) %(minx)s %(miny)s))',%(db_srid)s)
WHERE package_id = %(id)s WHERE package_id = %(id)s
""" """
msg = 'Updated extent for package %s' msg = 'Updated extent for package %s'
else: else:
# Insert # Insert
if srid and srid != db_srid: if srid and srid != db_srid:
@ -206,23 +209,23 @@ def save_extent(package,extent=False):
statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( statement = """INSERT INTO package_extent (package_id,the_geom) VALUES (
%(id)s, %(id)s,
ST_Transform( ST_Transform(
ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s, %(maxx)s %(miny)s,
%(maxx)s %(maxy)s, %(maxx)s %(maxy)s,
%(minx)s %(maxy)s, %(minx)s %(maxy)s,
%(minx)s %(miny)s))',%(srid)s), %(minx)s %(miny)s))',%(srid)s),
%(db_srid)) %(db_srid))
)""" )"""
params.update({'srid': srid}) params.update({'srid': srid})
else: else:
statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( statement = """INSERT INTO package_extent (package_id,the_geom) VALUES (
%(id)s, %(id)s,
ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s, %(maxx)s %(miny)s,
%(maxx)s %(maxy)s, %(maxx)s %(maxy)s,
%(minx)s %(maxy)s, %(minx)s %(maxy)s,
%(minx)s %(miny)s))',%(db_srid)s))""" %(minx)s %(miny)s))',%(db_srid)s))"""
msg = 'Created new extent for package %s' msg = 'Created new extent for package %s'
conn.execute(statement,params) conn.execute(statement,params)

View File

@ -15,7 +15,7 @@ __all__ = [
'HarvestJob', 'harvest_job_table', 'HarvestJob', 'harvest_job_table',
'HarvestObject', 'harvest_object_table', 'HarvestObject', 'harvest_object_table',
'HarvestGatherError', 'harvest_gather_error_table', 'HarvestGatherError', 'harvest_gather_error_table',
'HarvestObjectError', 'harvest_object_error_table', 'HarvestObjectError', 'harvest_object_error_table',
] ]
class HarvestError(Exception): class HarvestError(Exception):
@ -57,7 +57,7 @@ class HarvestSource(HarvestDomainObject):
class HarvestJob(HarvestDomainObject): class HarvestJob(HarvestDomainObject):
'''A Harvesting Job is performed in two phases. In first place, the '''A Harvesting Job is performed in two phases. In first place, the
**gather** stage collects all the Ids and URLs that need to be fetched **gather** stage collects all the Ids and URLs that need to be fetched
from the harvest source. Errors occurring in this phase from the harvest source. Errors occurring in this phase
(``HarvestGatherError``) are stored in the ``harvest_gather_error`` (``HarvestGatherError``) are stored in the ``harvest_gather_error``
table. During the next phase, the **fetch** stage retrieves the table. During the next phase, the **fetch** stage retrieves the
``HarvestedObjects`` and, if necessary, the **import** stage stores ``HarvestedObjects`` and, if necessary, the **import** stage stores
@ -82,7 +82,7 @@ class HarvestGatherError(HarvestDomainObject):
pass pass
class HarvestObjectError(HarvestDomainObject): class HarvestObjectError(HarvestDomainObject):
'''Object errors are raised during the **fetch** or **import** stage of a '''Object errors are raised during the **fetch** or **import** stage of a
harvesting job, and are referenced to a specific harvest object. harvesting job, and are referenced to a specific harvest object.
''' '''
pass pass
@ -135,16 +135,16 @@ harvest_object_error_table = Table('harvest_object_error',metadata,
) )
mapper( mapper(
HarvestSource, HarvestSource,
harvest_source_table, harvest_source_table,
properties={ properties={
'objects': relation( 'objects': relation(
HarvestObject, HarvestObject,
backref=u'source', backref=u'source',
), ),
'jobs': relation( 'jobs': relation(
HarvestJob, HarvestJob,
backref=u'source', backref=u'source',
order_by=harvest_job_table.c.created, order_by=harvest_job_table.c.created,
), ),
}, },
@ -156,7 +156,7 @@ mapper(
) )
mapper( mapper(
HarvestObject, HarvestObject,
harvest_object_table, harvest_object_table,
properties={ properties={
'package':relation( 'package':relation(

View File

@ -8,7 +8,7 @@
<py:def function="optional_head"> <py:def function="optional_head">
<link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" /> <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
</py:def> </py:def>
<div py:match="content"> <div py:match="content">
<div class="harvest-content"> <div class="harvest-content">
<py:if test="c.sources"> <py:if test="c.sources">
@ -20,20 +20,24 @@
<th></th> <th></th>
<th></th> <th></th>
<th>URL</th> <th>URL</th>
<th>Status</th> <th>Type</th>
<th>Active</th>
<!-- <th>Status</th>
<th>Statistics</th> <th>Statistics</th>
<th>Next Harvest</th> <th>Next Harvest</th>-->
<th>Created Date</th> <th>Created</th>
</tr> </tr>
<tr py:for="source in c.sources"> <tr py:for="source in c.sources">
<td>${h.link_to('view', 'harvest/' + source.id)}</td> <td>${h.link_to('view', 'harvest/' + source.id)}</td>
<td>${h.link_to('edit', 'harvest/' + source.id + '/edit')}</td> <td>${h.link_to('edit', 'harvest/' + source.id + '/edit')}</td>
<td>${h.link_to('refresh', 'harvest/' + source.id + '/refresh')}</td> <td>${h.link_to('refresh', 'harvest/' + source.id + '/refresh')}</td>
<td>${source.url}</td> <td>${source.url}</td>
<td>${source.status.last_harvest_status}</td> <td>${source.type}</td>
<td>${source.active}</td>
<!-- <td>${source.status.last_harvest_status}</td>
<td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td> <td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td>
<td>${source.status.next_harvest}</td> <td>${source.status.next_harvest}</td>-->
<td>${source.created}</td> <td>${source.created}</td>
</tr> </tr>
</table> </table>

View File

@ -22,22 +22,35 @@
<th>URL</th> <th>URL</th>
<td>${c.source.url}</td> <td>${c.source.url}</td>
</tr> </tr>
<tr>
<th>Type</th>
<td>${c.source.type}</td>
</tr>
<tr>
<th>Active</th>
<td>${c.source.active}</td>
</tr>
<tr> <tr>
<th>Description</th> <th>Description</th>
<td>${c.source.description}</td> <td>${c.source.description}</td>
</tr> </tr>
<tr> <tr>
<th>User</th> <th>User</th>
<td>${c.source.user_ref}</td> <td>${c.source.user_id}</td>
</tr> </tr>
<tr> <tr>
<th>Publisher</th> <th>Publisher</th>
<td>${c.source.publisher_ref}</td> <td>${c.source.publisher_id}</td>
</tr> </tr>
<tr> <tr>
<th>Created</th> <th>Created</th>
<td>${c.source.created}</td> <td>${c.source.created}</td>
</tr> </tr>
<tr>
<th>Total jobs</th>
<td>${len(c.source.jobs)}</td>
</tr>
<!--
<tr> <tr>
<th>Status</th> <th>Status</th>
<td> <td>
@ -74,6 +87,7 @@
</div> </div>
</td> </td>
</tr> </tr>
-->
</table> </table>
</py:if> </py:if>
</div> </div>