[#15] Add suport for Solr spatial indexing and querying
When the 'ckanext.spatial.search_backend' config option is set to 'solr', the extension will index geometries stored in the 'spatial' extra on the spatial field of the Solr index (named 'spatial_geom'). This is done on the 'before_index' extension point. Also, when doing a query, if the same config option is in place, the necessary fq parameter will be set to pass the spatial query to Solr.
This commit is contained in:
parent
a727aa815b
commit
c7a9cc660f
|
@ -130,6 +130,31 @@ class SpatialQuery(p.SingletonPlugin):
|
|||
action='spatial_query')
|
||||
return map
|
||||
|
||||
def before_index(self, pkg_dict):
|
||||
from shapely.geometry import asShape
|
||||
if 'extras_spatial' in pkg_dict and config.get('ckanext.spatial.search_backend') == 'solr':
|
||||
try:
|
||||
geometry = json.loads(pkg_dict['extras_spatial'])
|
||||
except ValueError, e:
|
||||
log.error('Geometry not valid GeoJSON, not indexing')
|
||||
return pkg_dict
|
||||
# Check wrong bboxes (4 same points)
|
||||
if geometry['type'] == 'Polygon' and len(geometry['coordinates'][0]) == 5:
|
||||
x = [p[0] for p in geometry['coordinates'][0]]
|
||||
y = [p[1] for p in geometry['coordinates'][0]]
|
||||
|
||||
if x.count(x[0]) == 5 and y.count(y[0]) == 5:
|
||||
geometry = {'type': 'Point', 'coordinates': [x[0], y[0]]}
|
||||
|
||||
shape = asShape(geometry)
|
||||
if not shape.is_valid:
|
||||
log.error('Wrong geometry, not indexing')
|
||||
return pkg_dict
|
||||
|
||||
pkg_dict['spatial_geom'] = shape.wkt
|
||||
return pkg_dict
|
||||
|
||||
|
||||
def before_search(self,search_params):
|
||||
if 'extras' in search_params and 'ext_bbox' in search_params['extras'] \
|
||||
and search_params['extras']['ext_bbox']:
|
||||
|
@ -138,53 +163,70 @@ class SpatialQuery(p.SingletonPlugin):
|
|||
if not bbox:
|
||||
raise SearchError('Wrong bounding box provided')
|
||||
|
||||
# Note: This will be deprecated at some point in favour of the
|
||||
# Solr 4 spatial sorting capabilities
|
||||
if search_params['sort'] == 'spatial desc' and \
|
||||
p.toolkit.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')):
|
||||
if search_params['q'] or search_params['fq']:
|
||||
raise SearchError('Spatial ranking cannot be mixed with other search parameters')
|
||||
# ...because it is too inefficient to use SOLR to filter
|
||||
# results and return the entire set to this class and
|
||||
# after_search do the sorting and paging.
|
||||
extents = bbox_query_ordered(bbox)
|
||||
are_no_results = not extents
|
||||
search_params['extras']['ext_rows'] = search_params['rows']
|
||||
search_params['extras']['ext_start'] = search_params['start']
|
||||
# this SOLR query needs to return no actual results since
|
||||
# they are in the wrong order anyway. We just need this SOLR
|
||||
# query to get the count and facet counts.
|
||||
rows = 0
|
||||
search_params['sort'] = None # SOLR should not sort.
|
||||
# Store the rankings of the results for this page, so for
|
||||
# after_search to construct the correctly sorted results
|
||||
rows = search_params['extras']['ext_rows'] = search_params['rows']
|
||||
start = search_params['extras']['ext_start'] = search_params['start']
|
||||
search_params['extras']['ext_spatial'] = [
|
||||
(extent.package_id, extent.spatial_ranking) \
|
||||
for extent in extents[start:start+rows]]
|
||||
if config.get('ckanext.spatial.search_backend') == 'solr':
|
||||
search_params = self._params_for_solr_search(bbox, search_params)
|
||||
else:
|
||||
extents = bbox_query(bbox)
|
||||
are_no_results = extents.count() == 0
|
||||
search_params = self._params_for_postgis_search(bbox, search_params)
|
||||
|
||||
if are_no_results:
|
||||
# We don't need to perform the search
|
||||
search_params['abort_search'] = True
|
||||
else:
|
||||
# We'll perform the existing search but also filtering by the ids
|
||||
# of datasets within the bbox
|
||||
bbox_query_ids = [extent.package_id for extent in extents]
|
||||
return search_params
|
||||
|
||||
q = search_params.get('q','').strip() or '""'
|
||||
new_q = '%s AND ' % q if q else ''
|
||||
new_q += '(%s)' % ' OR '.join(['id:%s' % id for id in bbox_query_ids])
|
||||
def _params_for_solr_search(self, bbox, search_params):
|
||||
search_params['fq'] += ' +spatial_geom:"Intersects({minx} {miny} {maxx} {maxy})"' \
|
||||
.format(minx=bbox['minx'],miny=bbox['miny'],maxx=bbox['maxx'],maxy=bbox['maxy'])
|
||||
|
||||
search_params['q'] = new_q
|
||||
#TODO: sorting
|
||||
|
||||
return search_params
|
||||
|
||||
def _params_for_postgis_search(self, bbox, search_params):
|
||||
|
||||
# Note: This will be deprecated at some point in favour of the
|
||||
# Solr 4 spatial sorting capabilities
|
||||
if search_params['sort'] == 'spatial desc' and \
|
||||
p.toolkit.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')):
|
||||
if search_params['q'] or search_params['fq']:
|
||||
raise SearchError('Spatial ranking cannot be mixed with other search parameters')
|
||||
# ...because it is too inefficient to use SOLR to filter
|
||||
# results and return the entire set to this class and
|
||||
# after_search do the sorting and paging.
|
||||
extents = bbox_query_ordered(bbox)
|
||||
are_no_results = not extents
|
||||
search_params['extras']['ext_rows'] = search_params['rows']
|
||||
search_params['extras']['ext_start'] = search_params['start']
|
||||
# this SOLR query needs to return no actual results since
|
||||
# they are in the wrong order anyway. We just need this SOLR
|
||||
# query to get the count and facet counts.
|
||||
rows = 0
|
||||
search_params['sort'] = None # SOLR should not sort.
|
||||
# Store the rankings of the results for this page, so for
|
||||
# after_search to construct the correctly sorted results
|
||||
rows = search_params['extras']['ext_rows'] = search_params['rows']
|
||||
start = search_params['extras']['ext_start'] = search_params['start']
|
||||
search_params['extras']['ext_spatial'] = [
|
||||
(extent.package_id, extent.spatial_ranking) \
|
||||
for extent in extents[start:start+rows]]
|
||||
else:
|
||||
extents = bbox_query(bbox)
|
||||
are_no_results = extents.count() == 0
|
||||
|
||||
if are_no_results:
|
||||
# We don't need to perform the search
|
||||
search_params['abort_search'] = True
|
||||
else:
|
||||
# We'll perform the existing search but also filtering by the ids
|
||||
# of datasets within the bbox
|
||||
bbox_query_ids = [extent.package_id for extent in extents]
|
||||
|
||||
q = search_params.get('q','').strip() or '""'
|
||||
new_q = '%s AND ' % q if q else ''
|
||||
new_q += '(%s)' % ' OR '.join(['id:%s' % id for id in bbox_query_ids])
|
||||
|
||||
search_params['q'] = new_q
|
||||
|
||||
return search_params
|
||||
|
||||
def after_search(self, search_results, search_params):
|
||||
|
||||
|
||||
# Note: This will be deprecated at some point in favour of the
|
||||
# Solr 4 spatial sorting capabilities
|
||||
|
||||
|
@ -267,12 +309,12 @@ class HarvestMetadataApi(p.SingletonPlugin):
|
|||
'''
|
||||
Harvest Metadata API
|
||||
(previously called "InspireApi")
|
||||
|
||||
|
||||
A way for a user to view the harvested metadata XML, either as a raw file or
|
||||
styled to view in a web browser.
|
||||
'''
|
||||
p.implements(p.IRoutes)
|
||||
|
||||
|
||||
def before_map(self, route_map):
|
||||
controller = "ckanext.spatial.controllers.api:HarvestMetadataApiController"
|
||||
|
||||
|
|
Loading…
Reference in New Issue