Add a CLI command to create or update the geometries for package extents

This commit is contained in:
Adrià Mercader 2011-03-18 15:44:40 +00:00
parent 38150cca47
commit 4036858ac9
4 changed files with 84 additions and 49 deletions

View File

@ -61,7 +61,11 @@ The following operations can be run from the command line using the
harvester run
- runs harvesting jobs
harvester extents
- creates or updates the extent geometry column for packages with
a bounding box defined in extras
The commands should be run from the ckanext-harvest directory and expect
a development.ini file to be present. Most of the time you will specify
the config explicitly though::

View File

@ -6,6 +6,8 @@ from ckan.lib.cli import CkanCommand
from ckan.model import repo
from ckanext.harvest.model import HarvestSource, HarvestingJob, HarvestedDocument
from ckanext.harvest.lib import save_extent
class Harvester(CkanCommand):
'''Harvests remotely mastered metadata
@ -93,6 +95,9 @@ class Harvester(CkanCommand):
self.list_harvesting_jobs()
elif cmd == 'run':
self.run_harvester()
elif cmd == 'extents':
self.update_extents()
else:
print 'Command %s not recognized' % cmd
@ -103,6 +108,17 @@ class Harvester(CkanCommand):
logger_vdm = logging.getLogger('vdm')
logger_vdm.setLevel(logging.ERROR)
def update_extents(self):
from ckan.model import PackageExtra, Package, Session
conn = Session.connection()
packages = [extra.package \
for extra in \
Session.query(PackageExtra).filter(PackageExtra.key == 'bbox-east-long').all()]
for package in packages:
save_extent(package)
print "Done. Extents generated for %i packages" % len(packages)
def run_harvester(self, *args, **kwds):
from pylons.i18n.translation import _get_translator
import pylons

View File

@ -23,6 +23,9 @@ import ckan.rating
import ckan.misc
from ckan.lib.munge import munge_title_to_name
from ckanext.harvest.lib import save_extent
log = __import__("logging").getLogger(__name__)
def gen_new_name(title):
@ -218,7 +221,7 @@ class HarvestingJobController(object):
# Create new package from data.
package = self._create_package_from_data(package_data)
if package.extras.get('bbox-east-long'):
self._save_extent(package)
save_extent(package)
log.info("Created new package ID %s with GEMINI guid %s", package.id, gemini_guid)
harvested_doc = HarvestedDocument(
@ -235,7 +238,7 @@ class HarvestingJobController(object):
else:
package = self._create_package_from_data(package_data, package = package)
if package.extras.get('bbox-east-long'):
self._save_extent(package)
save_extent(package)
log.info("Updated existing package ID %s with existing GEMINI guid %s", package.id, gemini_guid)
harvested_doc.content = content
@ -246,52 +249,6 @@ class HarvestingJobController(object):
assert gemini_guid == package.documents[0].guid
return package
def _save_extent(self,package):
#TODO: configure SRID
conn = model.Session.connection()
minx = float(package.extras.get('bbox-east-long'))
miny = float(package.extras.get('bbox-south-lat'))
maxx = float(package.extras.get('bbox-west-long'))
maxy = float(package.extras.get('bbox-north-lat'))
try:
# Check if extent already exists
rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall()
update =(len(rows) > 0)
if update:
# Update
statement = """UPDATE package_extent SET
the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s,
%(maxx)s %(maxy)s,
%(minx)s %(maxy)s,
%(minx)s %(miny)s))',4258)
WHERE package_id = %(id)s
"""
msg = 'Updated extent for package %s'
else:
# Insert
statement = """INSERT INTO package_extent (package_id,the_geom) VALUES (
%(id)s,
ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s,
%(maxx)s %(maxy)s,
%(minx)s %(maxy)s,
%(minx)s %(miny)s))',4258))"""
msg = 'Created new extent for package %s'
conn.execute(statement,{'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy})
model.Session.commit()
log.info(msg, package.id)
except:
log.error('An error occurred when saving the extent for package %s',package.id)
finally:
return package
def get_content(self, url):
try:
http_response = urllib2.urlopen(url)

View File

@ -0,0 +1,58 @@
from ckan.model import Session
log = __import__("logging").getLogger(__name__)
def save_extent(package,extent=False):
'''Updates the package extent in the package_extent geometry column
If no extent provided (as a dict with minx,miny,maxx,maxy and srid keys),
the values stored in the package extras are used'''
#TODO: configure SRID
conn = Session.connection()
if extent:
minx = extent['minx']
miny = extent['miny']
maxx = extent['maxx']
maxy = extent['maxy']
else:
minx = float(package.extras.get('bbox-east-long'))
miny = float(package.extras.get('bbox-south-lat'))
maxx = float(package.extras.get('bbox-west-long'))
maxy = float(package.extras.get('bbox-north-lat'))
try:
# Check if extent already exists
rows = conn.execute('SELECT package_id FROM package_extent WHERE package_id = %s',package.id).fetchall()
update =(len(rows) > 0)
if update:
# Update
statement = """UPDATE package_extent SET
the_geom = ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s,
%(maxx)s %(maxy)s,
%(minx)s %(maxy)s,
%(minx)s %(miny)s))',4258)
WHERE package_id = %(id)s
"""
msg = 'Updated extent for package %s'
else:
# Insert
statement = """INSERT INTO package_extent (package_id,the_geom) VALUES (
%(id)s,
ST_GeomFromText('POLYGON ((%(minx)s %(miny)s,
%(maxx)s %(miny)s,
%(maxx)s %(maxy)s,
%(minx)s %(maxy)s,
%(minx)s %(miny)s))',4258))"""
msg = 'Created new extent for package %s'
conn.execute(statement,{'id':package.id, 'minx':minx,'miny':miny,'maxx':maxx,'maxy':maxy})
Session.commit()
log.info(msg, package.id)
except:
log.error('An error occurred when saving the extent for package %s',package.id)
finally:
return package