diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index f8cef35..023ec0e 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -8,6 +8,7 @@ from ckan.lib.base import BaseController, c, g, request, \ from ckan.model import Package +from ckanext.harvest.lib import * class ViewController(BaseController): @@ -32,39 +33,19 @@ class ViewController(BaseController): if data: http_request.add_data(data) - + try: return urllib2.urlopen(http_request) except urllib2.HTTPError as e: raise - def index(self): - # Request all harvesting sources - sources_url = self.api_url + '/harvestsource' - try: - doc = self._do_request(sources_url).read() + # Request all harvest sources + c.sources = get_harvest_sources() - sources_ids = json.loads(doc) - - source_url = sources_url + '/%s' - sources = [] - - # For each source, request its details - for source_id in sources_ids: - doc = self._do_request(source_url % source_id).read() - sources.append(json.loads(doc)) - - c.sources = sources - except urllib2.HTTPError as e: - msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) - h.flash_error(msg) - except urllib2.URLError as e: - msg = 'Could not find server %r: %r' % (sources_url, e) - h.flash_error(msg) - + #TODO: show source reports return render('ckanext/harvest/index.html') - + def create(self): # This is the DGU form API, so we don't use self.api_url @@ -91,37 +72,30 @@ class ViewController(BaseController): data = json.dumps(data) try: r = self._do_request(form_url,data) - + h.flash_success('Harvesting source added successfully') except urllib2.HTTPError as e: msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) - # The form API returns just a 500, so we are not exactly sure of what + # The form API returns just a 500, so we are not exactly sure of what # happened, but most probably it was a duplicate entry if e.getcode() == 500: msg = msg + ' Does the source already exist?' h.flash_error(msg) finally: redirect(h.url_for(controller='harvest', action='index')) - - def show(self,id): - sources_url = self.api_url + '/harvestsource/%s' % id - try: - doc = self._do_request(sources_url).read() - c.source = json.loads(doc) - except urllib2.HTTPError as e: - msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) - h.flash_error(msg) + def show(self,id): + c.source = get_harvest_source(id) + + #TODO: show source reports return render('ckanext/harvest/show.html') def delete(self,id): - form_url = self.form_api_url + '/harvestsource/delete/%s' % id try: - r = self._do_request(form_url) - + delete_harvest_source(id) h.flash_success('Harvesting source deleted successfully') - except urllib2.HTTPError as e: - msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) + except Exception as e: + msg = 'An error occurred: [%s]' % e.message h.flash_error(msg) redirect(h.url_for(controller='harvest', action='index', id=None)) @@ -147,34 +121,23 @@ class ViewController(BaseController): data = json.dumps(data) try: r = self._do_request(form_url,data) - + h.flash_success('Harvesting source edited successfully') except urllib2.HTTPError as e: msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) h.flash_error(msg) finally: redirect(h.url_for(controller='harvest', action='index', id=None)) - + def create_harvesting_job(self,id): - form_url = self.api_url + '/harvestingjob' - data = { - 'source_id': id, - 'user_ref': '' - } - data = json.dumps(data) try: - r = self._do_request(form_url,data) - + create_harvest_job(id) h.flash_success('Refresh requested, harvesting will take place within 15 minutes.') - except urllib2.HTTPError as e: - msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg) - if e.getcode() == 400: - msg = msg + ' ' + e.read() - - h.flash_error(msg) - finally: - redirect(h.url_for(controller='harvest', action='index', id=None)) + except Exception as e: + msg = 'An error occurred: [%s]' % e.message + h.flash_error(msg) + redirect(h.url_for(controller='harvest', action='index', id=None)) def map_view(self,id): #check if package exists diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py index aa4d53e..7ca1289 100644 --- a/ckanext/harvest/lib/__init__.py +++ b/ckanext/harvest/lib/__init__.py @@ -18,6 +18,8 @@ def _source_as_dict(source): for obj in source.objects: out['objects'].append(obj.as_dict()) + #TODO: Get some report data + return out def _job_as_dict(job): @@ -35,9 +37,9 @@ def _job_as_dict(job): for error in job.gather_errors: out['object_errors'].append(error.as_dict()) - + return out - + def get_harvest_source(id,default=Exception,attr=None): source = HarvestSource.get(id,default=default,attr=attr) @@ -56,7 +58,7 @@ def create_harvest_source(source_dict): exists = get_harvest_sources(url=source_dict['url']) if len(exists): raise Exception('There is already a Harvest Source for this URL: %s' % source_dict['url']) - + source = HarvestSource() source.url = source_dict['url'] source.type = source_dict['type'] @@ -66,10 +68,10 @@ def create_harvest_source(source_dict): source.__setattr__(o,source_dict[o]) source.save() - + return _source_as_dict(source) - + def delete_harvest_source(source_id): try: @@ -79,7 +81,7 @@ def delete_harvest_source(source_id): source.delete() repo.commit_and_remove() - + #TODO: Jobs? return True @@ -95,7 +97,8 @@ def get_harvest_jobs(**kwds): def create_harvest_job(source_id): # Check if source exists try: - source = get_harvest_source(source_id) + #We'll need the actual HarvestSource + source = HarvestSource.get(source_id) except: raise Exception('Source %s does not exist' % source_id) @@ -106,7 +109,7 @@ def create_harvest_job(source_id): job = HarvestJob() job.source = source - + job.save() return _job_as_dict(job) @@ -119,7 +122,7 @@ def delete_harvest_job(job_id): job.delete() repo.commit_and_remove() - + #TODO: objects? return True @@ -141,7 +144,7 @@ def get_srid(crs): return int(srid) -#TODO: move to ckanext-?? for geo stuff +#TODO: move to ckanext-?? for geo stuff def save_extent(package,extent=False): '''Updates the package extent in the package_extent geometry column If no extent provided (as a dict with minx,miny,maxx,maxy and srid keys), @@ -152,12 +155,12 @@ def save_extent(package,extent=False): srid = None if extent: - minx = extent['minx'] + minx = extent['minx'] miny = extent['miny'] maxx = extent['maxx'] maxy = extent['maxy'] if 'srid' in extent: - srid = extent['srid'] + srid = extent['srid'] else: minx = float(package.extras.get('bbox-east-long')) miny = float(package.extras.get('bbox-south-lat')) @@ -165,22 +168,22 @@ def save_extent(package,extent=False): maxy = float(package.extras.get('bbox-north-lat')) crs = package.extras.get('spatial-reference-system') if crs: - srid = get_srid(crs) + srid = get_srid(crs) try: - + # Check if extent already exists rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall() update =(len(rows) > 0) - + params = {'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy, 'db_srid': db_srid} - + if update: # Update if srid and srid != db_srid: # We need to reproject the input geometry - statement = """UPDATE package_extent SET + statement = """UPDATE package_extent SET the_geom = ST_Transform( - ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, %(maxx)s %(miny)s, %(maxx)s %(maxy)s, %(minx)s %(maxy)s, @@ -190,15 +193,15 @@ def save_extent(package,extent=False): """ params.update({'srid': srid}) else: - statement = """UPDATE package_extent SET - the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + statement = """UPDATE package_extent SET + the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, %(maxx)s %(miny)s, %(maxx)s %(maxy)s, %(minx)s %(maxy)s, %(minx)s %(miny)s))',%(db_srid)s) WHERE package_id = %(id)s """ - msg = 'Updated extent for package %s' + msg = 'Updated extent for package %s' else: # Insert if srid and srid != db_srid: @@ -206,23 +209,23 @@ def save_extent(package,extent=False): statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( %(id)s, ST_Transform( - ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, %(maxx)s %(miny)s, %(maxx)s %(maxy)s, %(minx)s %(maxy)s, %(minx)s %(miny)s))',%(srid)s), %(db_srid)) )""" - params.update({'srid': srid}) + params.update({'srid': srid}) else: statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( %(id)s, - ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, %(maxx)s %(miny)s, %(maxx)s %(maxy)s, %(minx)s %(maxy)s, %(minx)s %(miny)s))',%(db_srid)s))""" - msg = 'Created new extent for package %s' + msg = 'Created new extent for package %s' conn.execute(statement,params) diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index 0f81573..8ebe477 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -15,7 +15,7 @@ __all__ = [ 'HarvestJob', 'harvest_job_table', 'HarvestObject', 'harvest_object_table', 'HarvestGatherError', 'harvest_gather_error_table', - 'HarvestObjectError', 'harvest_object_error_table', + 'HarvestObjectError', 'harvest_object_error_table', ] class HarvestError(Exception): @@ -57,7 +57,7 @@ class HarvestSource(HarvestDomainObject): class HarvestJob(HarvestDomainObject): '''A Harvesting Job is performed in two phases. In first place, the **gather** stage collects all the Ids and URLs that need to be fetched - from the harvest source. Errors occurring in this phase + from the harvest source. Errors occurring in this phase (``HarvestGatherError``) are stored in the ``harvest_gather_error`` table. During the next phase, the **fetch** stage retrieves the ``HarvestedObjects`` and, if necessary, the **import** stage stores @@ -82,7 +82,7 @@ class HarvestGatherError(HarvestDomainObject): pass class HarvestObjectError(HarvestDomainObject): - '''Object errors are raised during the **fetch** or **import** stage of a + '''Object errors are raised during the **fetch** or **import** stage of a harvesting job, and are referenced to a specific harvest object. ''' pass @@ -135,16 +135,16 @@ harvest_object_error_table = Table('harvest_object_error',metadata, ) mapper( - HarvestSource, + HarvestSource, harvest_source_table, - properties={ + properties={ 'objects': relation( HarvestObject, backref=u'source', ), 'jobs': relation( HarvestJob, - backref=u'source', + backref=u'source', order_by=harvest_job_table.c.created, ), }, @@ -156,7 +156,7 @@ mapper( ) mapper( - HarvestObject, + HarvestObject, harvest_object_table, properties={ 'package':relation( diff --git a/templates/ckanext/harvest/index.html b/templates/ckanext/harvest/index.html index aad9e2b..4b1a559 100644 --- a/templates/ckanext/harvest/index.html +++ b/templates/ckanext/harvest/index.html @@ -8,7 +8,7 @@ - +
@@ -20,20 +20,24 @@ URL - Status + Type + Active + + Created - + ${h.link_to('view', 'harvest/' + source.id)} - ${h.link_to('edit', 'harvest/' + source.id + '/edit')} - ${h.link_to('refresh', 'harvest/' + source.id + '/refresh')} + ${h.link_to('edit', 'harvest/' + source.id + '/edit')} + ${h.link_to('refresh', 'harvest/' + source.id + '/refresh')} ${source.url} - ${source.status.last_harvest_status} + ${source.type} + ${source.active} + ${source.created} diff --git a/templates/ckanext/harvest/show.html b/templates/ckanext/harvest/show.html index eec7fb2..4ac883a 100644 --- a/templates/ckanext/harvest/show.html +++ b/templates/ckanext/harvest/show.html @@ -22,22 +22,35 @@ URL ${c.source.url} + + Type + ${c.source.type} + + + Active + ${c.source.active} + Description ${c.source.description} User - ${c.source.user_ref} + ${c.source.user_id} Publisher - ${c.source.publisher_ref} + ${c.source.publisher_id} Created ${c.source.created} + + Total jobs + ${len(c.source.jobs)} + +