From 4036858ac931189084a4b3b0bd8a881c57f95d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Mercader?= Date: Fri, 18 Mar 2011 15:44:40 +0000 Subject: [PATCH] Add a CLI command to create or update the geometries for package extents --- README.rst | 6 ++- ckanext/harvest/commands/harvester.py | 16 +++++++ ckanext/harvest/controllers/harvesting.py | 53 ++------------------- ckanext/harvest/lib/__init__.py | 58 +++++++++++++++++++++++ 4 files changed, 84 insertions(+), 49 deletions(-) create mode 100644 ckanext/harvest/lib/__init__.py diff --git a/README.rst b/README.rst index bc37f03..f6ed59c 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,11 @@ The following operations can be run from the command line using the harvester run - runs harvesting jobs - + + harvester extents + - creates or updates the extent geometry column for packages with + a bounding box defined in extras + The commands should be run from the ckanext-harvest directory and expect a development.ini file to be present. Most of the time you will specify the config explicitly though:: diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 3d2199e..05d6466 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -6,6 +6,8 @@ from ckan.lib.cli import CkanCommand from ckan.model import repo from ckanext.harvest.model import HarvestSource, HarvestingJob, HarvestedDocument +from ckanext.harvest.lib import save_extent + class Harvester(CkanCommand): '''Harvests remotely mastered metadata @@ -93,6 +95,9 @@ class Harvester(CkanCommand): self.list_harvesting_jobs() elif cmd == 'run': self.run_harvester() + elif cmd == 'extents': + self.update_extents() + else: print 'Command %s not recognized' % cmd @@ -103,6 +108,17 @@ class Harvester(CkanCommand): logger_vdm = logging.getLogger('vdm') logger_vdm.setLevel(logging.ERROR) + def update_extents(self): + from ckan.model import PackageExtra, Package, Session + conn = Session.connection() + packages = [extra.package \ + for extra in \ + Session.query(PackageExtra).filter(PackageExtra.key == 'bbox-east-long').all()] + for package in packages: + save_extent(package) + + print "Done. Extents generated for %i packages" % len(packages) + def run_harvester(self, *args, **kwds): from pylons.i18n.translation import _get_translator import pylons diff --git a/ckanext/harvest/controllers/harvesting.py b/ckanext/harvest/controllers/harvesting.py index 056bba9..1708298 100644 --- a/ckanext/harvest/controllers/harvesting.py +++ b/ckanext/harvest/controllers/harvesting.py @@ -23,6 +23,9 @@ import ckan.rating import ckan.misc from ckan.lib.munge import munge_title_to_name +from ckanext.harvest.lib import save_extent + + log = __import__("logging").getLogger(__name__) def gen_new_name(title): @@ -218,7 +221,7 @@ class HarvestingJobController(object): # Create new package from data. package = self._create_package_from_data(package_data) if package.extras.get('bbox-east-long'): - self._save_extent(package) + save_extent(package) log.info("Created new package ID %s with GEMINI guid %s", package.id, gemini_guid) harvested_doc = HarvestedDocument( @@ -235,7 +238,7 @@ class HarvestingJobController(object): else: package = self._create_package_from_data(package_data, package = package) if package.extras.get('bbox-east-long'): - self._save_extent(package) + save_extent(package) log.info("Updated existing package ID %s with existing GEMINI guid %s", package.id, gemini_guid) harvested_doc.content = content @@ -246,52 +249,6 @@ class HarvestingJobController(object): assert gemini_guid == package.documents[0].guid return package - def _save_extent(self,package): - #TODO: configure SRID - conn = model.Session.connection() - - minx = float(package.extras.get('bbox-east-long')) - miny = float(package.extras.get('bbox-south-lat')) - maxx = float(package.extras.get('bbox-west-long')) - maxy = float(package.extras.get('bbox-north-lat')) - - try: - - # Check if extent already exists - rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall() - update =(len(rows) > 0) - - if update: - # Update - statement = """UPDATE package_extent SET - the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, - %(maxx)s %(miny)s, - %(maxx)s %(maxy)s, - %(minx)s %(maxy)s, - %(minx)s %(miny)s))',4258) - WHERE package_id = %(id)s - """ - msg = 'Updated extent for package %s' - else: - # Insert - statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( - %(id)s, - ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, - %(maxx)s %(miny)s, - %(maxx)s %(maxy)s, - %(minx)s %(maxy)s, - %(minx)s %(miny)s))',4258))""" - msg = 'Created new extent for package %s' - - conn.execute(statement,{'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy}) - - model.Session.commit() - log.info(msg, package.id) - except: - log.error('An error occurred when saving the extent for package %s',package.id) - finally: - return package - def get_content(self, url): try: http_response = urllib2.urlopen(url) diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py new file mode 100644 index 0000000..596579a --- /dev/null +++ b/ckanext/harvest/lib/__init__.py @@ -0,0 +1,58 @@ +from ckan.model import Session + +log = __import__("logging").getLogger(__name__) + +def save_extent(package,extent=False): + '''Updates the package extent in the package_extent geometry column + If no extent provided (as a dict with minx,miny,maxx,maxy and srid keys), + the values stored in the package extras are used''' + #TODO: configure SRID + conn = Session.connection() + if extent: + minx = extent['minx'] + miny = extent['miny'] + maxx = extent['maxx'] + maxy = extent['maxy'] + else: + minx = float(package.extras.get('bbox-east-long')) + miny = float(package.extras.get('bbox-south-lat')) + maxx = float(package.extras.get('bbox-west-long')) + maxy = float(package.extras.get('bbox-north-lat')) + + try: + + # Check if extent already exists + rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall() + update =(len(rows) > 0) + + if update: + # Update + statement = """UPDATE package_extent SET + the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + %(maxx)s %(miny)s, + %(maxx)s %(maxy)s, + %(minx)s %(maxy)s, + %(minx)s %(miny)s))',4258) + WHERE package_id = %(id)s + """ + msg = 'Updated extent for package %s' + else: + # Insert + statement = """INSERT INTO package_extent (package_id,the_geom) VALUES ( + %(id)s, + ST_GeomFromText('POLYGON ((%(minx)s %(miny)s, + %(maxx)s %(miny)s, + %(maxx)s %(maxy)s, + %(minx)s %(maxy)s, + %(minx)s %(miny)s))',4258))""" + msg = 'Created new extent for package %s' + + conn.execute(statement,{'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy}) + + Session.commit() + log.info(msg, package.id) + except: + log.error('An error occurred when saving the extent for package %s',package.id) + finally: + return package +