Revert to the old implementation for Bbox based searches

The approach using Solr's own BBoxField turned out to be not optimal as
it required the use of LocalParams. This is fine if the query is used in
`fq` but CKAN core does not allow it on `q` (unreasonably IMO). Long
story short the old implementation is a bit more verbose but supports:

* Proper spatial sorting
* Combining queries in `q` with a spatial filter
* Antimeridian queries handling

It doesn't support Point geometries, but users can use
`solr-spatial-field` if they need them
This commit is contained in:
amercader 2022-10-11 14:18:08 +02:00
parent 15caaad9f1
commit ec0cc00bbe
4 changed files with 83 additions and 63 deletions

View File

@ -57,29 +57,72 @@ class SolrBBoxSearchBackend(SpatialSearchBackend):
return dataset_dict
bounds = shape.bounds
bbox = fit_bbox(normalize_bbox(list(bounds)))
dataset_dict[
"spatial_bbox"
] = "ENVELOPE({minx}, {maxx}, {maxy}, {miny})".format(**bbox)
bbox = normalize_bbox(list(bounds))
if not bbox:
return dataset_dict
dataset_dict.update(bbox)
return dataset_dict
def search_params(self, bbox, search_params):
"""
This will add the following parameters to the query:
bbox = fit_bbox(bbox)
defType - edismax (We need to define EDisMax to use bf)
bf - {function} A boost function to influence the score (thus
influencing the sorting). The algorithm can be basically defined as:
if not search_params.get("fq_list"):
search_params["fq_list"] = []
2 * X / Q + T
default_spatial_query = "{{!field f={spatial_field}}}Intersects(ENVELOPE({minx}, {maxx}, {maxy}, {miny}))"
Where X is the intersection between the query area Q and the
target geometry T. It gives a ratio from 0 to 1 where 0 means
no overlap at all and 1 a perfect fit
spatial_query = config.get("ckanext.spatial.solr_query", default_spatial_query)
fq - Adds a filter that force the value returned by the previous
function to be between 0 and 1, effectively applying the
spatial filter.
search_params["fq_list"].append(
spatial_query.format(spatial_field="spatial_bbox", **bbox)
"""
while bbox["minx"] < -180:
bbox["minx"] += 360
bbox["maxx"] += 360
while bbox["minx"] > 180:
bbox["minx"] -= 360
bbox["maxx"] -= 360
values = dict(
input_minx=bbox["minx"],
input_maxx=bbox["maxx"],
input_miny=bbox["miny"],
input_maxy=bbox["maxy"],
area_search=abs(bbox["maxx"] - bbox["minx"])
* abs(bbox["maxy"] - bbox["miny"]),
)
bf = (
"""div(
mul(
mul(max(0, sub(min({input_maxx},maxx) , max({input_minx},minx))),
max(0, sub(min({input_maxy},maxy) , max({input_miny},miny)))
),
2),
add({area_search},mul(sub(maxy, miny), sub(maxx, minx)))
)""".format(
**values
)
.replace("\n", "")
.replace(" ", "")
)
search_params["fq_list"] = search_params.get("fq_list", [])
search_params["fq_list"].append("{!frange incl=false l=0 u=1}%s" % bf)
search_params["bf"] = bf
search_params["defType"] = "edismax"
return search_params

View File

@ -3,7 +3,7 @@
import os
geojson_examples = {
"point": '{"type":"Point","coordinates":[100.0,0.0]}',
"point": '{"type":"Point","coordinates":[100.0,2.0]}',
"point_2": '{"type":"Point","coordinates":[20,10]}',
"line": '{"type":"LineString","coordinates":[[100.0,0.0],[101.0,1.0]]}',
"polygon": '{"type":"Polygon","coordinates":[[[100.0,0.0],[101.0,0.0],'

View File

@ -39,7 +39,7 @@ extents = {
class TestBBoxSearch(SpatialTestBase):
def test_spatial_query(self):
dataset = factories.Dataset(
extras=[{"key": "spatial", "value": self.geojson_examples["point"]}]
extras=[{"key": "spatial", "value": extents["ohio"]}]
)
result = helpers.call_action(
@ -49,9 +49,20 @@ class TestBBoxSearch(SpatialTestBase):
assert result["count"] == 1
assert result["results"][0]["id"] == dataset["id"]
def test_spatial_query_point(self):
dataset = factories.Dataset(
extras=[{"key": "spatial", "value": self.geojson_examples["point"]}]
)
result = helpers.call_action(
"package_search", extras={"ext_bbox": "-180,-90,180,90"}
)
assert result["count"] == 0
def test_spatial_query_outside_bbox(self):
factories.Dataset(
extras=[{"key": "spatial", "value": self.geojson_examples["point"]}]
extras=[{"key": "spatial", "value": extents["ohio"]}]
)
result = helpers.call_action(
@ -243,39 +254,6 @@ class TestBBoxSearch(SpatialTestBase):
assert result["count"] == 1
assert result["results"][0]["id"] == dataset["id"]
def test_custom_spatial_query(self, monkeypatch, ckan_config):
"""
xxxxxx
xxxxxxx xxx
xxx xxx
xx xx
xxxx xxx
x xxxxxx
xxx xxxx
xxxx
"""
dataset = factories.Dataset(extras=[{"key": "spatial", "value": extents["nz"]}])
result = helpers.call_action(
"package_search", extras={"ext_bbox": "175,-39.5,176.5,-39"}
)
assert result["count"] == 1
assert result["results"][0]["id"] == dataset["id"]
monkeypatch.setitem(
ckan_config,
"ckanext.spatial.solr_query",
"{{!field f={spatial_field}}}Contains(ENVELOPE({minx}, {maxx}, {maxy}, {miny}))")
result = helpers.call_action(
"package_search", extras={"ext_bbox": "175,-39.5,176.5,-39"}
)
assert result["count"] == 0
def test_geometry_collection(self):
""" Test a geometry collection """

View File

@ -22,7 +22,7 @@ your ini file. This plugin in turn requires the ``spatial_metadata`` plugin, eg:
To define which backend to use for the spatial search use the following
configuration option (see `Choosing a backend for the spatial search`_)::
ckanext.spatial.search_backend = solr
ckanext.spatial.search_backend = solr-bbox
Geo-Indexing your datasets
@ -69,11 +69,13 @@ In this case you need to implement the ``before_dataset_index()`` method of the
def before_dataset_search(self, dataset_dict):
# When using the default `solr` backend (based on bounding boxes), you need to
# include the `spatial_bbox` field in the returned dataset_dict. Make sure to use
# the correct syntax expected by Solr:
# When using the default `solr-bbox` backend (based on bounding boxes), you need to
# include the following fields in the returned dataset_dict:
dataset_dict["spatial_bbox"] = "ENVELOPE({minx}, {maxx}, {maxy}, {miny})"
dataset_dict["minx"] = minx
dataset_dict["maxx"] = maxx
dataset_dict["miny"] = miny
dataset_dict["maxy"] = maxy
# When using the `solr-spatial-field` backend, you need to include the `spatial_geom`
# field in the returned dataset_dict. This should be a valid geometry in WKT format.
@ -120,7 +122,7 @@ The following table summarizes the different spatial search backends:
+-------------------------+--------------------------------------+--------------------+
| Backend | Supported geometries indexed in Solr | Solr setup needed |
+=========================+======================================+====================+
| ``solr-bbox`` (default) | Bounding Box | Custom field |
| ``solr-bbox`` (default) | Bounding Box, Polygon (extents only) | Custom fields |
+-------------------------+--------------------------------------+--------------------+
| ``solr-spatial-field`` | Bounding Box, Point and Polygon | Custom field + JTS |
+-------------------------+--------------------------------------+--------------------+
@ -130,26 +132,23 @@ The following table summarizes the different spatial search backends:
The ``solr-bbox`` backend is probably a good starting point. Here are more
details about the available options (again, you don't need to modify Solr if you are using one of the spatial enabled official Docker images):
details about the available options (again, you don't need to modify Solr if you are using one of the spatially enabled official Docker images):
* ``solr-bbox``
This option always indexes just the extent of the provided geometries, whether if it's an
actual bounding box or not. It uses Solr's `BBoxField <https://solr.apache.org/guide/8_11/spatial-search.html#bboxfield>`_ so you need to add the following to your Solr schema::
<types>
<!-- ... -->
<fieldType name="bbox" class="solr.BBoxField"
geo="true" distanceUnits="kilometers" numberType="pdouble" />
</types>
actual bounding box or not. It supports spatial sorting of the returned results (based on the closeness of their bounding box to the query bounding box). It uses standard Solr float fields so you just need to add the following to your Solr schema::
<fields>
<!-- ... -->
<field name="spatial_bbox" type="bbox" />
<field name="minx" type="float" indexed="true" stored="true" />
<field name="maxx" type="float" indexed="true" stored="true" />
<field name="miny" type="float" indexed="true" stored="true" />
<field name="maxy" type="float" indexed="true" stored="true" />
</fields>
* ``solr-spatial-field``
This option uses the `RPT <https://solr.apache.org/guide/8_11/spatial-search.html#rpt>`_ Solr field, which allows
to index points, rectangles and more complex geometries like polygons. This requires the install of the `JTS`_ library. See the linked Solr documentation for details on this.
to index points, rectangles and more complex geometries like polygons. This requires the install of the `JTS`_ library. See the linked Solr documentation for details on this. Note that it does not support spatial sorting of the returned results.
You will need to add the following field type and field to your Solr
schema file to enable it ::