[#195] Don't use PostGIS by default

Consolidate all DB related modules and functions into a single module
for easier encapsulation. The PostGIS database table and the functions
to store PostGIS geometries are no longer called by default.

There is a new config option (`ckan.spatial.use_postgis=true`) to
re-enable this behaviour. This option (and the actual PostGIS modules)
will be dropped in future versions.
This commit is contained in:
amercader 2022-08-24 11:24:30 +02:00
parent ffba7092db
commit 93f6f87868
13 changed files with 323 additions and 301 deletions

View File

@ -1,42 +0,0 @@
from ckan.model import meta, Session
from sqlalchemy import types, Column, Table
from geoalchemy2.elements import WKTElement
from geoalchemy2 import Geometry
from sqlalchemy import func
ST_Transform = func.ST_Transform
ST_Equals = func.ST_Equals
legacy_geoalchemy = False
def postgis_version():
result = Session.execute('SELECT postgis_lib_version()')
return result.scalar()
def setup_spatial_table(package_extent_class, db_srid=None):
# PostGIS 1.5 requires management=True when defining the Geometry
# field
management = (postgis_version()[:1] == '1')
package_extent_table = Table(
'package_extent', meta.metadata,
Column('package_id', types.UnicodeText, primary_key=True),
Column('the_geom', Geometry('GEOMETRY', srid=db_srid,
management=management)),
extend_existing=True
)
meta.mapper(package_extent_class, package_extent_table)
return package_extent_table
def compare_geometry_fields(geom_field1, geom_field2):
return Session.scalar(ST_Equals(geom_field1, geom_field2))

View File

@ -31,7 +31,7 @@ from ckanext.harvest.harvesters.base import HarvesterBase
from ckanext.harvest.model import HarvestObject
from ckanext.spatial.validation import Validators, all_validators
from ckanext.spatial.model import ISODocument
from ckanext.spatial.harvested_metadata import ISODocument
from ckanext.spatial.interfaces import ISpatialHarvester
from ckantoolkit import config
@ -133,7 +133,7 @@ def guess_resource_format(resource_locator, use_mimetypes=True):
resource_type = protocols.get(protocol)
if resource_type:
return resource_type
url = resource_locator.get('url').lower().strip()
resource_types = {

View File

@ -33,7 +33,7 @@ from ckan.lib.navl.validators import not_empty
from ckanext.harvest.interfaces import IHarvester
from ckanext.harvest.model import HarvestObject
from ckanext.spatial.model import GeminiDocument
from ckanext.spatial.harvested_metadata import GeminiDocument
from ckanext.spatial.lib.csw_client import CswService
from ckanext.spatial.harvesters.base import SpatialHarvester, text_traceback

View File

@ -1,17 +1,7 @@
import six
import logging
from string import Template
from ckan.model import Session, Package
import six
import ckantoolkit as tk
from ckanext.spatial.model import PackageExtent
from shapely.geometry import shape
from ckanext.spatial.geoalchemy_common import (WKTElement, ST_Transform,
compare_geometry_fields,
)
config = tk.config
log = logging.getLogger(__name__)
@ -19,71 +9,23 @@ log = logging.getLogger(__name__)
def get_srid(crs):
"""Returns the SRID for the provided CRS definition
The CRS can be defined in the following formats
- urn:ogc:def:crs:EPSG::4326
- EPSG:4326
- 4326
"""
The CRS can be defined in the following formats
- urn:ogc:def:crs:EPSG::4326
- EPSG:4326
- 4326
"""
if ':' in crs:
crs = crs.split(':')
srid = crs[len(crs)-1]
if ":" in crs:
crs = crs.split(":")
srid = crs[len(crs) - 1]
else:
srid = crs
srid = crs
return int(srid)
def save_package_extent(package_id, geometry = None, srid = None):
'''Adds, updates or deletes the package extent geometry.
package_id: Package unique identifier
geometry: a Python object implementing the Python Geo Interface
(i.e a loaded GeoJSON object)
srid: The spatial reference in which the geometry is provided.
If None, it defaults to the DB srid.
Will throw ValueError if the geometry object does not provide a geo interface.
The responsibility for calling model.Session.commit() is left to the
caller.
'''
db_srid = int(config.get('ckan.spatial.srid', '4326'))
existing_package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id==package_id).first()
if geometry:
geom_obj = shape(geometry)
if not srid:
srid = db_srid
package_extent = PackageExtent(package_id=package_id,
the_geom=WKTElement(geom_obj.wkt, srid))
# Check if extent exists
if existing_package_extent:
# If extent exists but we received no geometry, we'll delete the existing one
if not geometry:
existing_package_extent.delete()
log.debug('Deleted extent for package %s' % package_id)
else:
# Check if extent changed
if not compare_geometry_fields(package_extent.the_geom, existing_package_extent.the_geom):
# Update extent
existing_package_extent.the_geom = package_extent.the_geom
existing_package_extent.save()
log.debug('Updated extent for package %s' % package_id)
else:
log.debug('Extent for package %s unchanged' % package_id)
elif geometry:
# Insert extent
Session.add(package_extent)
log.debug('Created new extent for package %s' % package_id)
def validate_bbox(bbox_values):
'''
"""
Ensures a bbox is expressed in a standard dict.
bbox_values may be:
@ -97,96 +39,21 @@ def validate_bbox(bbox_values):
'maxy': 56.43}
Any problems and it returns None.
'''
"""
if isinstance(bbox_values,six.string_types):
bbox_values = bbox_values.split(',')
if isinstance(bbox_values, six.string_types):
bbox_values = bbox_values.split(",")
if len(bbox_values) != 4:
return None
try:
bbox = {}
bbox['minx'] = float(bbox_values[0])
bbox['miny'] = float(bbox_values[1])
bbox['maxx'] = float(bbox_values[2])
bbox['maxy'] = float(bbox_values[3])
except ValueError as e:
bbox["minx"] = float(bbox_values[0])
bbox["miny"] = float(bbox_values[1])
bbox["maxx"] = float(bbox_values[2])
bbox["maxy"] = float(bbox_values[3])
except ValueError:
return None
return bbox
def _bbox_2_wkt(bbox, srid):
'''
Given a bbox dictionary, return a WKTSpatialElement, transformed
into the database\'s CRS if necessary.
returns e.g. WKTSpatialElement("POLYGON ((2 0, 2 1, 7 1, 7 0, 2 0))", 4326)
'''
db_srid = int(config.get('ckan.spatial.srid', '4326'))
bbox_template = Template('POLYGON (($minx $miny, $minx $maxy, $maxx $maxy, $maxx $miny, $minx $miny))')
wkt = bbox_template.substitute(minx=bbox['minx'],
miny=bbox['miny'],
maxx=bbox['maxx'],
maxy=bbox['maxy'])
if srid and srid != db_srid:
# Input geometry needs to be transformed to the one used on the database
input_geometry = ST_Transform(WKTElement(wkt,srid),db_srid)
else:
input_geometry = WKTElement(wkt,db_srid)
return input_geometry
def bbox_query(bbox,srid=None):
'''
Performs a spatial query of a bounding box.
bbox - bounding box dict
Returns a query object of PackageExtents, which each reference a package
by ID.
'''
input_geometry = _bbox_2_wkt(bbox, srid)
extents = Session.query(PackageExtent) \
.filter(PackageExtent.package_id==Package.id) \
.filter(PackageExtent.the_geom.intersects(input_geometry)) \
.filter(Package.state==u'active')
return extents
def bbox_query_ordered(bbox, srid=None):
'''
Performs a spatial query of a bounding box. Returns packages in order
of how similar the data\'s bounding box is to the search box (best first).
bbox - bounding box dict
Returns a query object of PackageExtents, which each reference a package
by ID.
'''
input_geometry = _bbox_2_wkt(bbox, srid)
params = {'query_bbox': six.text_type(input_geometry),
'query_srid': input_geometry.srid}
# First get the area of the query box
sql = "SELECT ST_Area(ST_GeomFromText(:query_bbox, :query_srid));"
params['search_area'] = Session.execute(sql, params).fetchone()[0]
# Uses spatial ranking method from "USGS - 2006-1279" (Lanfear)
sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking,
package_extent.package_id AS package_id
FROM package_extent, package
WHERE package_extent.package_id = package.id
AND ST_Intersects(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))
AND package.state = 'active'
ORDER BY spatial_ranking desc"""
extents = Session.execute(sql, params).fetchall()
log.debug('Spatial results: %r',
[('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]])
return extents

View File

@ -1,11 +0,0 @@
from __future__ import absolute_import
# this is a namespace package
try:
import pkg_resources
pkg_resources.declare_namespace(__name__)
except ImportError:
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)
from .package_extent import *
from .harvested_metadata import *

View File

@ -1,69 +0,0 @@
from logging import getLogger
from sqlalchemy import Table
from ckan.lib.base import config
from ckan import model
from ckan.model import Session
from ckan.model import meta
from ckan.model.domain_object import DomainObject
from ckanext.spatial.geoalchemy_common import setup_spatial_table
log = getLogger(__name__)
package_extent_table = None
DEFAULT_SRID = 4326 #(WGS 84)
def setup(srid=None):
if package_extent_table is None:
define_spatial_tables(srid)
log.debug('Spatial tables defined in memory')
if model.package_table.exists():
if not Table('geometry_columns',meta.metadata).exists() or \
not Table('spatial_ref_sys',meta.metadata).exists():
raise Exception('The spatial extension is enabled, but PostGIS ' + \
'has not been set up in the database. ' + \
'Please refer to the "Setting up PostGIS" section in the README.')
if not package_extent_table.exists():
try:
package_extent_table.create()
except Exception as e:
# Make sure the table does not remain incorrectly created
# (eg without geom column or constraints)
if package_extent_table.exists():
Session.execute('DROP TABLE package_extent')
Session.commit()
raise e
log.debug('Spatial tables created')
else:
log.debug('Spatial tables already exist')
# Future migrations go here
else:
log.debug('Spatial tables creation deferred')
class PackageExtent(DomainObject):
def __init__(self, package_id=None, the_geom=None):
self.package_id = package_id
self.the_geom = the_geom
def define_spatial_tables(db_srid=None):
global package_extent_table
if not db_srid:
db_srid = int(config.get('ckan.spatial.srid', DEFAULT_SRID))
else:
db_srid = int(db_srid)
package_extent_table = setup_spatial_table(PackageExtent, db_srid)

View File

@ -8,7 +8,6 @@ import ckantoolkit as tk
from ckan import plugins as p
from ckanext.spatial.lib import save_package_extent
from ckan.lib.helpers import json
if tk.check_ckan_version(min_version="2.9.0"):
@ -33,14 +32,22 @@ class SpatialMetadata(p.SingletonPlugin):
p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers, inherit=True)
use_postgis = False
# IConfigurable
def configure(self, config):
from ckanext.spatial.model.package_extent import setup as setup_model
if not tk.asbool(config.get('ckan.spatial.testing', 'False')):
log.debug('Setting up the spatial model')
setup_model()
# PostGIS is no longer required, support for it will be dropped in the future
self.use_postgis = tk.asbool(config.get("ckan.spatial.use_postgis", False))
if self.use_postgis:
from ckanext.spatial.postgis.model import setup as setup_model
if not tk.asbool(config.get("ckan.spatial.testing", False)):
log.debug("Setting up the spatial model")
setup_model()
# IConfigure
@ -75,7 +82,10 @@ class SpatialMetadata(p.SingletonPlugin):
return self.after_dataset_delete(context, data_dict)
def after_dataset_delete(self, context, data_dict):
save_package_extent(data_dict["id"], None)
if self.use_postgis:
from ckanext.spatial.postgis.model import save_package_extent
save_package_extent(data_dict["id"], None)
def check_spatial_extra(self, dataset_dict):
'''
@ -96,7 +106,8 @@ class SpatialMetadata(p.SingletonPlugin):
else:
geometry = extra["value"]
if geometry is None or geometry == "" or delete:
if (geometry is None or geometry == "" or delete) and self.use_postgis:
from ckanext.spatial.postgis.model import save_package_extent
save_package_extent(dataset_id, None)
elif not geometry:
return
@ -118,13 +129,15 @@ class SpatialMetadata(p.SingletonPlugin):
error_dict = {"spatial": [msg]}
raise tk.ValidationError(error_dict)
try:
save_package_extent(dataset_id, geometry)
except Exception as e:
if bool(os.getenv('DEBUG')):
raise
error_dict = {"spatial": ["Error: {}".format(six.text_type(e))]}
raise tk.ValidationError(error_dict)
if self.use_postgis:
from ckanext.spatial.postgis.model import save_package_extent
try:
save_package_extent(dataset_id, geometry)
except Exception as e:
if bool(os.getenv('DEBUG')):
raise
error_dict = {"spatial": ["Error: {}".format(six.text_type(e))]}
raise tk.ValidationError(error_dict)
# ITemplateHelpers

View File

@ -0,0 +1,7 @@
PostGIS is no longer required, extents are not stored in a table with a geometry column
anymore by default. This modules are kept here for backwards compatibility only, and will
be removed in future versions.
Users that need to keep storing dataset extents in PostGIS for some reason can re-enable
this behaviour by setting the `ckan.spatial.use_postgis=True` configuration option.
Again, this feature will be dropped in future versions.

View File

View File

@ -0,0 +1,256 @@
import logging
from string import Template
import six
from sqlalchemy import Table, Column, types, func
from geoalchemy2.elements import WKTElement
from geoalchemy2 import Geometry
from shapely.geometry import shape
from ckan.lib.base import config
from ckan import model
from ckan.model import meta, Session, Package
from ckan.model.domain_object import DomainObject
log = logging.getLogger(__name__)
package_extent_table = None
DEFAULT_SRID = 4326 # (WGS 84)
ST_Transform = func.ST_Transform
ST_Equals = func.ST_Equals
def setup(srid=None):
if package_extent_table is None:
define_spatial_tables(srid)
log.debug("Spatial tables defined in memory")
if model.package_table.exists():
if (
not Table("geometry_columns", meta.metadata).exists()
or not Table("spatial_ref_sys", meta.metadata).exists()
):
raise Exception(
"The spatial extension is enabled, but PostGIS "
+ "has not been set up in the database. "
+ 'Please refer to the "Setting up PostGIS" section in the README.'
)
if not package_extent_table.exists():
try:
package_extent_table.create()
except Exception as e:
# Make sure the table does not remain incorrectly created
# (eg without geom column or constraints)
if package_extent_table.exists():
Session.execute("DROP TABLE package_extent")
Session.commit()
raise e
log.debug("Spatial tables created")
else:
log.debug("Spatial tables already exist")
# Future migrations go here
else:
log.debug("Spatial tables creation deferred")
class PackageExtent(DomainObject):
def __init__(self, package_id=None, the_geom=None):
self.package_id = package_id
self.the_geom = the_geom
def define_spatial_tables(db_srid=None):
global package_extent_table
if not db_srid:
db_srid = int(config.get("ckan.spatial.srid", DEFAULT_SRID))
else:
db_srid = int(db_srid)
package_extent_table = setup_spatial_table(PackageExtent, db_srid)
def postgis_version():
result = Session.execute("SELECT postgis_lib_version()")
return result.scalar()
def setup_spatial_table(package_extent_class, db_srid=None):
# PostGIS 1.5 requires management=True when defining the Geometry
# field
management = postgis_version()[:1] == "1"
package_extent_table = Table(
"package_extent",
meta.metadata,
Column("package_id", types.UnicodeText, primary_key=True),
Column("the_geom", Geometry("GEOMETRY", srid=db_srid, management=management)),
extend_existing=True,
)
meta.mapper(package_extent_class, package_extent_table)
return package_extent_table
def compare_geometry_fields(geom_field1, geom_field2):
return Session.scalar(ST_Equals(geom_field1, geom_field2))
def save_package_extent(package_id, geometry=None, srid=None):
"""Adds, updates or deletes the package extent geometry.
package_id: Package unique identifier
geometry: a Python object implementing the Python Geo Interface
(i.e a loaded GeoJSON object)
srid: The spatial reference in which the geometry is provided.
If None, it defaults to the DB srid.
Will throw ValueError if the geometry object does not provide a geo interface.
The responsibility for calling model.Session.commit() is left to the
caller.
"""
db_srid = int(config.get("ckan.spatial.srid", "4326"))
existing_package_extent = (
Session.query(PackageExtent)
.filter(PackageExtent.package_id == package_id)
.first()
)
if geometry:
geom_obj = shape(geometry)
if not srid:
srid = db_srid
package_extent = PackageExtent(
package_id=package_id, the_geom=WKTElement(geom_obj.wkt, srid)
)
# Check if extent exists
if existing_package_extent:
# If extent exists but we received no geometry, we'll delete the existing one
if not geometry:
existing_package_extent.delete()
log.debug("Deleted extent for package %s" % package_id)
else:
# Check if extent changed
if not compare_geometry_fields(
package_extent.the_geom, existing_package_extent.the_geom
):
# Update extent
existing_package_extent.the_geom = package_extent.the_geom
existing_package_extent.save()
log.debug("Updated extent for package %s" % package_id)
else:
log.debug("Extent for package %s unchanged" % package_id)
elif geometry:
# Insert extent
Session.add(package_extent)
log.debug("Created new extent for package %s" % package_id)
def _bbox_2_wkt(bbox, srid):
"""
Given a bbox dictionary, return a WKTSpatialElement, transformed
into the database\'s CRS if necessary.
returns e.g. WKTSpatialElement("POLYGON ((2 0, 2 1, 7 1, 7 0, 2 0))", 4326)
"""
db_srid = int(config.get("ckan.spatial.srid", "4326"))
bbox_template = Template(
"POLYGON (($minx $miny, $minx $maxy, $maxx $maxy, $maxx $miny, $minx $miny))"
)
wkt = bbox_template.substitute(
minx=bbox["minx"], miny=bbox["miny"], maxx=bbox["maxx"], maxy=bbox["maxy"]
)
if srid and srid != db_srid:
# Input geometry needs to be transformed to the one used on the database
input_geometry = ST_Transform(WKTElement(wkt, srid), db_srid)
else:
input_geometry = WKTElement(wkt, db_srid)
return input_geometry
def bbox_query(bbox, srid=None):
"""
Performs a spatial query of a bounding box.
bbox - bounding box dict
Returns a query object of PackageExtents, which each reference a package
by ID.
"""
input_geometry = _bbox_2_wkt(bbox, srid)
extents = (
Session.query(PackageExtent)
.filter(PackageExtent.package_id == Package.id)
.filter(PackageExtent.the_geom.intersects(input_geometry))
.filter(Package.state == u"active")
)
return extents
def bbox_query_ordered(bbox, srid=None):
"""
Performs a spatial query of a bounding box. Returns packages in order
of how similar the data\'s bounding box is to the search box (best first).
bbox - bounding box dict
Returns a query object of PackageExtents, which each reference a package
by ID.
"""
input_geometry = _bbox_2_wkt(bbox, srid)
params = {
"query_bbox": six.text_type(input_geometry),
"query_srid": input_geometry.srid,
}
# First get the area of the query box
sql = "SELECT ST_Area(ST_GeomFromText(:query_bbox, :query_srid));"
params["search_area"] = Session.execute(sql, params).fetchone()[0]
# Uses spatial ranking method from "USGS - 2006-1279" (Lanfear)
sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking,
package_extent.package_id AS package_id
FROM package_extent, package
WHERE package_extent.package_id = package.id
AND ST_Intersects(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))
AND package.state = 'active'
ORDER BY spatial_ranking desc"""
extents = Session.execute(sql, params).fetchall()
log.debug(
"Spatial results: %r",
[
("%.2f" % extent.spatial_ranking, extent.package_id)
for extent in extents[:20]
],
)
return extents

View File

@ -15,10 +15,9 @@ from pprint import pprint
from ckan import model
from ckan.model.package_extra import PackageExtra
from ckanext.spatial.lib import save_package_extent
from ckanext.spatial.lib.reports import validation_report
from ckanext.spatial.harvesters import SpatialHarvester
from ckanext.spatial.model import ISODocument
from ckanext.spatial.harvested_metadata import ISODocument
from ckantoolkit import config
@ -95,7 +94,7 @@ def initdb(srid=None):
if srid:
srid = six.text_type(srid)
from ckanext.spatial.model import setup as db_setup
from ckanext.spatial.postgis.model import setup as db_setup
db_setup(srid)
@ -103,15 +102,17 @@ def initdb(srid=None):
def update_extents():
from ckan.model import PackageExtra, Package, Session
conn = Session.connection()
packages = [extra.package \
for extra in \
Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()]
from ckanext.spatial.postgis.model import save_package_extent
packages = [
extra.package for extra in
model.Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()
]
errors = []
count = 0
for package in packages:
geometry = None
try:
value = package.extras['spatial']
log.debug('Received: %r' % value)
@ -127,7 +128,7 @@ def update_extents():
save_package_extent(package.id, geometry)
Session.commit()
model.Session.commit()
if errors:
msg = 'Errors were found:\n%s' % '\n'.join(errors)

View File

@ -1,6 +1,6 @@
import os
from pkg_resources import resource_stream
from ckanext.spatial.model import ISODocument
from ckanext.spatial.harvested_metadata import ISODocument
from lxml import etree