diff --git a/.gitignore b/.gitignore
index 39f7065..3304cfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ syntax: glob
*.swp
*~
dist
+build
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..17ee088
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "doc/_themes/sphinx-theme-okfn"]
+ path = doc/_themes/sphinx-theme-okfn
+ url = git@github.com:okfn/sphinx-theme-okfn.git
diff --git a/doc/_static/ckanlogo.png b/doc/_static/ckanlogo.png
new file mode 100644
index 0000000..a234cc0
Binary files /dev/null and b/doc/_static/ckanlogo.png differ
diff --git a/doc/_static/dataset-extent-map.png b/doc/_static/dataset-extent-map.png
new file mode 100644
index 0000000..4b788d2
Binary files /dev/null and b/doc/_static/dataset-extent-map.png differ
diff --git a/doc/_static/preview-geojson.png b/doc/_static/preview-geojson.png
new file mode 100644
index 0000000..c2ea881
Binary files /dev/null and b/doc/_static/preview-geojson.png differ
diff --git a/doc/_static/preview-wms.png b/doc/_static/preview-wms.png
new file mode 100644
index 0000000..1d947d3
Binary files /dev/null and b/doc/_static/preview-wms.png differ
diff --git a/doc/_static/spatial-search-widget.png b/doc/_static/spatial-search-widget.png
new file mode 100644
index 0000000..6b83ce6
Binary files /dev/null and b/doc/_static/spatial-search-widget.png differ
diff --git a/doc/_templates/footer.html b/doc/_templates/footer.html
new file mode 100644
index 0000000..93ac4bc
--- /dev/null
+++ b/doc/_templates/footer.html
@@ -0,0 +1,29 @@
+
An Open Knowledge Foundation project.
+
+
+{%- if show_copyright %}
+ {% trans copyright=copyright|safe|e %}{{ copyright }}{% endtrans %}
+{%- endif %}
+
+
+
+Source
+—
+Issues
+—
+Mailing List
+—
+Twitter @CKANProject
+
+
+
+Related Projects:
+The DataHub
+—
+DataCatalogs.org
+—
+OpenSpending.org
+—
+Open Data Handbook
+
+
diff --git a/doc/_themes/sphinx-theme-okfn b/doc/_themes/sphinx-theme-okfn
new file mode 160000
index 0000000..59688a6
--- /dev/null
+++ b/doc/_themes/sphinx-theme-okfn
@@ -0,0 +1 @@
+Subproject commit 59688a6679f3373a57e8d4e60e43f1b249878eb3
diff --git a/doc/conf.py b/doc/conf.py
index ea722e3..93a6937 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -25,7 +25,7 @@ import sys, os
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.pngmath']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.intersphinx']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -89,12 +89,14 @@ pygments_style = 'sphinx'
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
+exclude_trees = ['.build']
+
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = 'default'
+#html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
@@ -103,6 +105,18 @@ html_theme = 'default'
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
+sys.path.append(os.path.abspath('_themes'))
+html_theme_path = ['_themes']
+html_theme = 'sphinx-theme-okfn'
+html_theme_options = {
+ 'logo_icon': 'ckanlogo.png',
+ 'show_version': False,
+ 'show_okfn_logo': False,
+ }
+html_sidebars = {
+ '**': ['globaltoc.html']
+}
+
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
diff --git a/doc/csw.rst b/doc/csw.rst
new file mode 100644
index 0000000..0bbd792
--- /dev/null
+++ b/doc/csw.rst
@@ -0,0 +1,260 @@
+===========
+CSW support
+===========
+
+The extension provides the support for the CSW_ standard, a specification from
+the Open Geospatial Consortium for exposing geospatial catalogues over the web.
+
+This support consists of:
+
+* Ability to import records from CSW servers with the CSW harvester. See
+ :doc:`harvesters` for more details.
+
+* Integration with pycsw_ to provide a fully compliat CSW interface for
+ harvested records. This integration is described on the following sections.
+
+
+ckan-pycsw
+----------
+
+The spatial extension offers the ``ckan-pycsw`` command, which allows to expose
+the spatial datasets harvested from other sources in a CSW interface. This is
+powered by pycsw_, which fully implements the OGC CSW specification.
+
+How it works
+++++++++++++
+
+
+The current implementation is based on CKAN and pycsw being loosely integrated
+via the CKAN API. pycsw will be generally installed in the same server as CKAN
+(although it can also be run on a separate one), and the synchronization
+command will be run regularly to keep the records on the pycsw repository up to
+date. This is done using the CKAN API to get all the datasets identifiers (more
+precisely the ones from datasets that have been harvested) and then deciding
+which ones need to be created, updated or deleted on the pycsw repository. For
+those that need to be created or updated, the original harvested spatial
+document (ie ISO 19139) is requested from CKAN, and it is then imported using
+pycsw internal functions::
+
+ Harvested
+ datasets
+ +
+ |
+ v
+ +--------+ +---------+
+ | | CKAN API | |
+ | CKAN | +------------> | pycsw | +------> CSW
+ | | | |
+ +--------+ +---------+
+
+
+Remember, only datasets that were harvested with the :doc:`harvesters`
+can currently be exposed via pycsw.
+
+All necessary tasks are done with the ``ckan-pycsw`` command. To get more
+details of its usage, run the following::
+
+ cd /usr/lib/ckan/default/src/ckanext-spatial
+ paster ckan-pycsw --help
+
+
+Setup
++++++
+
+1. Install pycsw. There are several options for this, depending on your
+ server setup, check the `pycsw documentation`_.
+
+ .. note:: CKAN integration requires at least pycsw version 1.6.1. Make sure
+ to install at least this version.
+
+ The following instructions assume that you have installed CKAN via a
+ `package install`_ and should be run as root, but the steps are the same if
+ you are setting it up in another location::
+
+ cd /usr/lib/ckan/default/src
+ source ../bin/activate
+
+ # From now on the virtualenv should be activated
+
+ git clone https://github.com/geopython/pycsw.git
+ cd pycsw
+ # Remember to use at least pycsw 1.6.1
+ git checkout 1.6.1
+ pip install -e .
+ python setup.py build
+ python setup.py install
+
+2. Create a database for pycsw. In theory you can use the same database that
+ CKAN is using, but if you want to keep them separated, use the following
+ command to create a new one (we'll use the same default user though)::
+
+ sudo -u postgres createdb -O ckan_default pycsw -E utf-8
+
+ It is strongly recommended that you install PostGIS in the pycsw databaset,
+ so its spatial functions are used. See the :ref:`install_postgis`
+ section for details.
+
+3. Configure pycsw. An example configuration file is included on the source::
+
+ cp default-sample.cfg default.cfg
+
+ To keep things tidy we will create a symlink to this file on the CKAN
+ configuration directory::
+
+ ln -s /usr/lib/ckan/default/src/pycsw/default.cfg /etc/ckan/default/pycsw.cfg
+
+ Open the file with your favourite editor. The main settings you should tweak
+ are ``server.home`` and ``repository.database``::
+
+ [server]
+ home=/usr/lib/ckan/default/src/pycsw
+ ...
+ [repository]
+ database=postgresql://ckan_default:pass@localhost/pycsw
+
+ The rest of the options are described `here `_.
+
+4. Setup the pycsw table. This is done with the ``ckan-pycsw`` paster command
+ (Remember to have the virtualenv activated when running it)::
+
+ cd /usr/lib/ckan/default/src/ckanext-spatial
+ paster ckan-pycsw setup -p /etc/ckan/default/pycsw.cfg
+
+ At this point you should be ready to run pycsw with the wsgi script that it
+ includes::
+
+ cd /usr/lib/ckan/default/src/pycsw
+ python csw.wsgi
+
+ This will run pycsw at http://localhost:8000. Visiting the following URL
+ should return you the Capabilities file:
+
+ http://localhost:8000/?service=CSW&version=2.0.2&request=GetCapabilities
+
+5. Load the CKAN datasets into pycsw. Again, we will use the ``ckan-pycsw``
+ command for this::
+
+ cd /usr/lib/ckan/default/src/ckanext-spatial
+ paster ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
+
+ .. note:: If you get errors similar to this one, this is caused by
+ limitations on the pycsw model definition. This should be fixed in
+ future versions of pycsw::
+
+ ERROR: not inserted f8d48eaf-780b-40b8-a502-7a903fde5b1c Error:ERROR: value too long for type character varying(256)
+
+
+ When the loading is finished, check that results are returned when visiting
+ this link:
+
+ http://localhost:8000/?request=GetRecords&service=CSW&version=2.0.2&resultType=results&outputSchema=http://www.isotc211.org/2005/gmd&typeNames=csw:Record&elementSetName=summary
+
+ The ``numberOfRecordsMatched`` should match the number of harvested datasets
+ in CKAN (minus import errors). If you run the command again new or udpated
+ datasets will be synchronized and deleted datasets from CKAN will be removed
+ from pycsw as well.
+
+Running it on production site
++++++++++++++++++++++++++++++
+
+On a production site you probably want to run the load command regularly to
+keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN.
+
+* To run the load command regularly you can set up a cron job. Type ``crontab -e``
+ and copy the following lines::
+
+ # m h dom mon dow command
+ 0 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-spatial ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
+
+ This particular example will run the load command every hour. You can of
+ course modify this periodicity, for instance reducing it for huge instances.
+ This `Wikipedia page `_
+ has a good overview of the crontab syntax.
+
+* To run pycsw under Apache check the pycsw `installation documentation `_
+ or follow this quick steps (they assume the paths used on the previous steps):
+
+ - Edit ``/etc/apache2/sites-available/ckan_default`` and add the following
+ line just before the existing ``WSGIScriptAlias`` directive::
+
+ WSGIScriptAlias /csw /usr/lib/ckan/default/src/pycsw/csw.wsgi
+
+ - Edit the ``/usr/lib/ckan/default/src/pycsw/csw.wsgi`` file and add these two
+ lines just after the imports on the top of the file::
+
+ activate_this = os.path.join('/usr/lib/ckan/default/bin/activate_this.py')
+ execfile(activate_this, {"__file__":activate_this})
+
+ We need these to activate the virtualenv where we installed pycsw into.
+
+ - Restart Apache::
+
+ service apache2 restart
+
+ pycsw should be now accessible at http://localhost/csw
+
+
+Legacy plugins and libraries
+----------------------------
+
+
+Old CSW Server
+++++++++++++++
+
+.. warning:: **Deprecated:** The old csw plugin has been deprecated, please see `ckan-pycsw`_
+ for details on how to integrate with pycsw.
+
+To activate it, add the ``csw_server`` plugin to your ini file.
+
+Only harvested datasets are served by this CSW Server. This is because
+the harvested document is the one that is served, not something derived
+from the CKAN Dataset object. Datasets that are created in CKAN by methods
+other than harvesting are not served.
+
+The currently supported methods with this CSW Server are:
+ * GetCapabilities
+ * GetRecords
+ * GetRecordById
+
+For example you can ask the capabilities of the CSW server installed into CKAN
+running on 127.0.0.1:5000 like this::
+
+ curl 'http://127.0.0.1:5000/csw?request=GetCapabilities&service=CSW&version=2.0.2'
+
+And get a list of the records like this::
+
+ curl 'http://127.0.0.1:5000/csw?request=GetRecords&service=CSW&resultType=results&elementSetName=full&version=2.0.2'
+
+The standard CSW response is in XML format.
+
+cswinfo
++++++++
+
+The command-line tool ``cswinfo`` allows to make queries on CSW servers and
+returns the info in nicely formatted JSON. This may be more convenient to type
+than using, for example, curl.
+
+Currently available queries are:
+ * getcapabilities
+ * getidentifiers
+ * getrecords
+ * getrecordbyid
+
+For details, type::
+
+ cswinfo csw -h
+
+There are options for querying by only certain types, keywords and typenames
+as well as configuring the ElementSetName.
+
+The equivalent example to the one above for asking the cabailities is::
+
+ $ cswinfo csw getcapabilities http://127.0.0.1:5000/csw
+
+OWSLib is the library used to actually perform the queries.
+
+.. _pycsw: http://pycsw.org
+.. _pycsw documentation: http://pycsw.org/docs/installation.html
+.. _package install: http://docs.ckan.org/en/latest/install-from-package.html
+.. _CSW: http://www.opengeospatial.org/standards/cat
+
diff --git a/doc/dataset-map.rst b/doc/dataset-map.rst
deleted file mode 100644
index 92dd43e..0000000
--- a/doc/dataset-map.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Dataset Extent Map
-------------------
-
-To enable the dataset map you need to add the `dataset_extent_map` plugin to your
-ini file (See `Configuration`_). You need to load the `spatial_metadata` plugin also.
-
-When the plugin is enabled, if datasets contain a 'spatial' extra like the one
-described in the previous section, a map will be shown on the dataset details page.
-
diff --git a/doc/harvesters.rst b/doc/harvesters.rst
new file mode 100644
index 0000000..69cd9b4
--- /dev/null
+++ b/doc/harvesters.rst
@@ -0,0 +1,142 @@
+==================
+Spatial Harvesters
+==================
+
+Overview and Configuration
+--------------------------
+
+The spatial extension provides some harvesters for importing ISO19139-based
+metadata into CKAN, as well as providing a base class for writing new ones.
+The harvesters use the interface provided by ckanext-harvest_, so you will need
+to install and set it up first.
+
+Once ckanext-harvest is installed, you can add the following plugins to your
+ini file to enable the different harvesters (If you are upgrading from a
+previous version to CKAN 2.0 see legacy_harvesters_):
+
+* ``csw_harvester`` - CSW server
+* ``waf_harvester`` - WAF (Web Accessible Folder): An online accessible index
+ page with links to metadata documents
+* ``doc_harvester`` - A single online accessible metadata document.
+
+Have a look at the `ckanext-harvest documentation`_ if you want to have an
+overview of how the CKAN harvesters work, but basically there are three
+separate stages:
+
+* gather_stage - Aggregates all the remote identifiers for a particular source
+ (eg identifiers for a CSW server, files for a WAF).
+* fetch_stage - Fetches all the remote documents and stores them on the
+ database.
+* import_stage - Performs all the processing for transforming the remote
+ content into a CKAN dataset: validates the document, parses it, converts it
+ to a CKAN dataset dict and saves it in the database.
+
+The extension provides different XSD and schematron based validators. You can
+specify which validators to use for the remote documents with the following
+configuration option::
+
+ ckan.spatial.validator.profiles = iso19193eden
+
+By default, the import stage will stop if the validation of the harvested
+document fails. This can be modified setting the
+``ckanext.spatial.harvest.continue_on_validation_errors`` to True. The setting
+can also be applied at the source level setting to True the
+``continue_on_validation_errors`` key on the source configuration object.
+
+By default the harvesting actions (eg creating or updating datasets) will be
+performed by the internal site admin user. This is the recommended setting,
+but if necessary, it can be overridden with the
+``ckanext.spatial.harvest.user_name`` config option, eg to support the old
+hardcoded 'harvest' user::
+
+ ckanext.spatial.harvest.user_name = harvest
+
+Customizing the harvesters
+--------------------------
+
+The default harvesters provided in this extension can be overriden from
+extensions to customize to your needs. You can either extend ``CswHarvester``,
+``WAFfHarverster`` or the main ``SpatialHarvester`` class. There are some
+extension points that can be safely overriden from your extension. Probably the
+most useful is ``get_package_dict``, which allows to tweak the dataset fields
+before creating or updating them. ``transform_to_iso`` allows to hook into
+transformation mechanisms to transform other formats into ISO1939, the only one
+directly supported byt he spatial harvesters. Finally, the whole
+``import_stage`` can be overriden if the default logic does not suit your
+needs.
+
+Check the source code of ``ckanext/spatial/harvesters/base.py`` for more
+details on these functions.
+
+The `ckanext-geodatagov`_ extension contains live examples on how to extend
+the default spatial harvesters and create new ones for other spatial services
+like ArcGIS REST APIs.
+
+
+Harvest Metadata API
+--------------------
+
+This plugin allows to access the actual harvested document via API requests.
+It is enabled with the following plugin::
+
+ ckan.plugins = spatial_harvest_metadata_api
+
+(It was previously known as ``inspire_api``)
+
+To view the harvest objects (containing the harvested metadata) in the web
+interface, these controller locations are added:
+
+* raw XML document: /harvest/object/{id}
+* HTML representation: /harvest/object/{id}/html
+
+.. note:: The old URLs are now deprecated and redirect to the previously
+ mentioned:
+
+ * /api/2/rest/harvestobject//xml
+ * /api/2/rest/harvestobject//html
+
+
+For those harvest objects that have an original document (which was transformed
+to ISO), this can be accessed via:
+
+* raw XML document: /harvest/object/{id}/original
+* HTML representation: /harvest/object/{id}/html/original
+
+The HTML representation is created via an XSLT transformation. The extension
+provides an XSLT file that should work on ISO 19139 based documents, but if you
+want to use your own on your extension, you can override it using the following
+configuration options::
+
+ ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt
+ ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt
+
+If your project does not transform different metadata types you can ignore the
+second option.
+
+.. _legacy_harvesters:
+
+Legacy harvesters
+-----------------
+
+Prior to CKAN 2.0, the spatial harvesters available on this extension were
+based on the GEMINI2 format, an ISO19139 profile used by the UK Location
+Programme, and the logic for creating or updating datasets and the resulting
+fields were somehow adapted to the needs for this particular project. The
+harvesters were still generic enough and should work fine with other ISO19139
+based sources, but extra care has been put to make the new harvesters more
+generic and robust, so these ones should only be used on existing instances:
+
+* ``gemini_csw_harvester``
+* ``gemini_waf_harvester``
+* ``gemini_doc_harvester``
+
+If you are using these harvesters please consider upgrading to the new
+versions described on the previous section.
+
+
+.. todo:: Validation library details
+
+
+.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest
+.. _ckanext-harvest documentation: https://github.com/okfn/ckanext-harvest#the-harvesting-interface
+.. _ckanext-geodatagov: https://github.com/okfn/ckanext-geodatagov/blob/master/ckanext/geodatagov/harvesters/
diff --git a/doc/index.ckan b/doc/index.ckan
deleted file mode 100644
index 67b8e99..0000000
--- a/doc/index.ckan
+++ /dev/null
@@ -1,19 +0,0 @@
-===============================
-Welcome to ckanext-spatial docs
-===============================
-
-
-SPATIAL!!
-
-.. note ::
-
- This is the documentation for CKAN version '|version|'. If you are using a different version, use the links on the bottom right corner of the page to select the appropriate documentation.
-
-This Administration Guide covers how to set up and manage `CKAN `_ software.
-
-* The first two sections cover your two options for installing CKAN: package or source install.
-* The rest of the first half of the Guide, up to :doc:`authorization`, covers setup and basic admin.
-* The second half of the Guide, from :doc:`extensions` onwards, covers advanced tasks, including extensions and forms.
-
-For high-level information on what CKAN is, see the `CKAN website `_.
-
diff --git a/doc/index.rst b/doc/index.rst
index 60b2822..9a6010f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,50 +1,33 @@
-.. ckanext-spatial documentation master file, created by
- sphinx-quickstart on Wed Apr 10 17:17:12 2013.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-Welcome to ckanext-spatial's documentation!
-===========================================
-
==============================================
ckanext-spatial - Geo related plugins for CKAN
==============================================
-This extension contains plugins that add geospatial capabilities to CKAN.
-The following plugins are currently available:
+This extension contains plugins that add geospatial capabilities to CKAN_.
-* Spatial model for CKAN datasets and automatic geo-indexing (`spatial_metadata`)
-* Spatial Search - Spatial search integration and API call (`spatial_query`).
-* Spatial Search Widget - Map widget integrated on the search form (`spatial_query_widget`).
-* Dataset Extent Map - Map widget showing a dataset extent (`dataset_extent_map`).
-* WMS Preview - a Web Map Service (WMS) previewer (`wms_preview`).
-* CSW Server - a basic CSW server - to server metadata from the CKAN instance (`cswserver`)
-* GEMINI Harvesters - for importing INSPIRE-style metadata into CKAN (`gemini_csw_harvester`, `gemini_doc_harvester`, `gemini_waf_harvester`)
-* Harvest Metadata API - a way for a user to view the harvested metadata XML, either as a raw file or styled to view in a web browser. (`spatial_harvest_metadata_api`)
+You should have a CKAN instance installed before adding these plugins. Head to
+the `CKAN documentation`_ for information on how to set up CKAN.
-These libraries:
-* CSW Client - a basic client for accessing a CSW server
-* Validators - uses XSD / Schematron to validate geographic metadata XML. Used by the GEMINI Harvesters
-* Validators for ISO19139/INSPIRE/GEMINI2 metadata. Used by the Validator.
+The extension adds a spatial field to the default CKAN dataset schema,
+using PostGIS_ as the backend. This allows to perform spatial queries and
+display the dataset extent on the frontend. It also provides harvesters to
+import geospatial metadata into CKAN from other sources, as well as commands
+to support the CSW standard. Finally, it also includes plugins to preview
+spatial formats such as GeoJSON_.
-And these command-line tools:
-* cswinfo - a command-line tool to help making requests of any CSW server
-
-As of October 2012, ckanext-csw and ckanext-inspire were merged into this extension.
Contents:
.. toctree::
:maxdepth: 2
-
+
+ install
spatial-search
- dataset-map
+ harvesters
+ csw
+ previews
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+.. _CKAN: http://ckan.org
+.. _CKAN Documentation: http://docs.ckan.org
+.. _PostGIS: http://postgis.org
+.. _GeoJSON: http://geojson.org
diff --git a/doc/install.rst b/doc/install.rst
new file mode 100644
index 0000000..af67b06
--- /dev/null
+++ b/doc/install.rst
@@ -0,0 +1,243 @@
+======================
+Installation and Setup
+======================
+
+Check the Troubleshooting_ section if you get errors at any stage.
+
+.. _install_postgis:
+
+Install PostGIS and system packages
+-----------------------------------
+
+.. note:: If you *only* want to load the :doc:`previews` you don't need to
+ install any of the packages on this section and can skip to the
+ next one.
+
+.. note:: The package names and paths shown are the defaults on an Ubuntu
+ 12.04 install (PostgreSQL 9.1 and PostGIS 1.5). Adjust the
+ package names and the paths if you are using a different version of
+ any of them.
+
+All commands assume an existing CKAN database named ``ckan_default``.
+
+
+#. Install PostGIS::
+
+ sudo apt-get install postgresql-9.1-postgis
+
+#. Run the following commands. The first one will create the necessary
+ tables and functions in the database, and the second will populate
+ the spatial reference table::
+
+ sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/postgis.sql
+ sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/spatial_ref_sys.sql
+
+ .. note:: If using PostgreSQL 8.x, run the following command to enable
+ the necessary language::
+
+ sudo -u postgres createlang plpgsql ckan_default
+
+#. Change the owner to spatial tables to the CKAN user to avoid errors later
+ on::
+
+ ALTER TABLE spatial_ref_sys OWNER TO ckan_default;
+ ALTER TABLE geometry_columns OWNER TO ckan_default;
+
+#. Execute the following command to see if PostGIS was properly
+ installed::
+
+ sudo -u postgres psql -d ckan_default -c "SELECT postgis_full_version()"
+
+ You should get something like::
+
+ postgis_full_version
+ ------------------------------------------------------------------------------------------------------
+ POSTGIS="1.5.2" GEOS="3.2.2-CAPI-1.6.2" PROJ="Rel. 4.7.1, 23 September 2009" LIBXML="2.7.7" USE_STATS
+ (1 row)
+
+
+#. Install some other packages needed by the extension dependencies::
+
+ sudo apt-get install python-dev libxml2-dev libxslt1-dev libgeos-c1
+
+
+Install the extension
+---------------------
+
+1. Install this extension into your python environment (where CKAN is also
+ installed).
+
+ .. note:: Depending on the CKAN core version you are targeting you will need
+ to use a different branch from the extension.
+
+ For a production site, use the ``stable`` branch, unless there is a specific
+ branch that targets the CKAN core version that you are using.
+
+ To target the latest CKAN core release::
+
+ (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@stable#egg=ckanext-spatial
+
+ To target an old release (if a release branch exists, otherwise use
+ ``stable``)::
+
+ (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@release-v1.8#egg=ckanext-spatial
+
+ To target CKAN ``master``, use the extension ``master`` branch (ie no
+ branch defined)::
+
+ (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git#egg=ckanext-spatial
+
+
+2. Install the rest of python modules required by the extension::
+
+ (pyenv) $ pip install -r pip-requirements.txt
+
+To use the :doc:`harvesters`, you will need to install and configure the
+harvester extension: `ckanext-harvest`_. Follow the install instructions on
+its documentation for details on how to set it up.
+
+
+Configuration
+-------------
+
+Once PostGIS is installed and configured in the database the extension needs
+to create a table to store the datasets extent, called ``package_extent``.
+
+This will happen automatically the next CKAN is restarted after adding the
+plugins on the configuration ini file (eg when restarting Apache).
+
+If for some reason you need to explicitly create the table beforehand, you can
+do it with the following command (with the virtualenv activated)::
+
+ (pyenv) $ paster --plugin=ckanext-spatial spatial initdb [srid] --config=mysite.ini
+
+You can define the SRID of the geometry column. Default is 4326. If you are not
+familiar with projections, we recommend to use the default value. To know more
+about PostGIS tables, see :doc:`postgis-manual`
+
+Each plugin can be enabled by adding its name to the ``ckan.plugins`` in the
+CKAN ini file. For example::
+
+ ckan.plugins = spatial_metadata spatial_query
+
+When enabling the spatial metadata, you can define the projection in which
+extents are stored in the database with the following option. Use the EPSG code
+as an integer (e.g 4326, 4258, 27700, etc). It defaults to 4326::
+
+ ckan.spatial.srid = 4326
+
+
+Troubleshooting
+---------------
+
+Here are some common problems you may find when installing or using the
+extension:
+
+When initializing the spatial tables
+++++++++++++++++++++++++++++++++++++
+
+::
+
+ LINE 1: SELECT AddGeometryColumn('package_extent','the_geom', E'4326...
+ ^
+ HINT: No function matches the given name and argument types. You might need to add explicit type casts.
+ "SELECT AddGeometryColumn('package_extent','the_geom', %s, 'GEOMETRY', 2)" ('4326',)
+
+
+PostGIS was not installed correctly. Please check the "Setting up PostGIS"
+section.
+
+::
+
+ sqlalchemy.exc.ProgrammingError: (ProgrammingError) permission denied for relation spatial_ref_sys
+
+
+The user accessing the ckan database needs to be owner (or have permissions)
+of the geometry_columns and spatial_ref_sys tables.
+
+When migrating to an existing PostGIS database
+++++++++++++++++++++++++++++++++++++++++++++++
+
+If you are loading a database dump to an existing PostGIS database, you may
+find errors like ::
+
+ ERROR: type "spheroid" already exists
+
+This means that the PostGIS functions are installed, but you may need to
+create the necessary tables anyway. You can force psql to ignore these
+errors and continue the transaction with the ON_ERROR_ROLLBACK=on::
+
+ sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/8.4/contrib/postgis-1.5/postgis.sql -v ON_ERROR_ROLLBACK=on
+
+You will still need to populate the spatial_ref_sys table and change the
+tables permissions. Refer to the previous section for details on how to do
+it.
+
+When performing a spatial query
++++++++++++++++++++++++++++++++
+
+::
+
+ InvalidRequestError: SQL expression, column, or mapped entity expected - got ''
+
+The spatial model has not been loaded. You probably forgot to add the
+``spatial_metadata`` plugin to your ini configuration file.
+
+::
+
+ InternalError: (InternalError) Operation on two geometries with different SRIDs
+
+The spatial reference system of the database geometry column and the one
+used by CKAN differ. Remember, if you are using a different spatial
+reference system from the default one (WGS 84 lat/lon, EPSG:4326), you must
+define it in the configuration file as follows::
+
+ ckan.spatial.srid = 4258
+
+When running the spatial harvesters
++++++++++++++++++++++++++++++++++++
+
+::
+
+ File "xmlschema.pxi", line 102, in lxml.etree.XMLSchema.__init__ (src/lxml/lxml.etree.c:154475)
+ lxml.etree.XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml}doubleList'., line 1
+
+The XSD validation used by the spatial harvesters requires libxml2 ersion 2.9.
+
+With CKAN you would probably have installed an older version from your
+distribution. (e.g. with ``sudo apt-get install libxml2-dev``). You need to
+find the SO files for the old version::
+
+ $ find /usr -name "libxml2.so"
+
+For example, it may show it here: ``/usr/lib/x86_64-linux-gnu/libxml2.so``.
+The directory of the SO file is used as a parameter to the ``configure`` next
+on.
+
+Download the libxml2 source::
+
+ $ cd ~
+ $ wget ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz
+
+Unzip it::
+
+ $ tar zxvf libxml2-2.9.0.tar.gz
+ $ cd libxml2-2.9.0/
+
+Configure with the SO directory you found before::
+
+ $ ./configure --libdir=/usr/lib/x86_64-linux-gnu
+
+Now make it and install it::
+
+ $ make
+ $ sudo make install
+
+Now check the install by running xmllint::
+
+ $ xmllint --version
+ xmllint: using libxml version 20900
+ compiled with: Threads Tree Output Push Reader Patterns Writer SAXv1 FTP HTTP DTDValid HTML Legacy C14N Catalog XPath XPointer XInclude Iconv ISO8859X Unicode Regexps Automata Expr Schemas Schematron Modules Debug Zlib
+
+.. _PostGIS: http://postgis.org
+.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest
diff --git a/doc/postgis-manual.rst b/doc/postgis-manual.rst
new file mode 100644
index 0000000..5fd08ee
--- /dev/null
+++ b/doc/postgis-manual.rst
@@ -0,0 +1,43 @@
+==========================
+Setting up a PostGIS table
+==========================
+
+.. note:: The extension will generally set up the table automatically for you,
+ and also running the ``initdb`` command will have the same effect. This
+ section just describes what's going on for those who want to know more.
+
+To be able to store geometries and perform spatial operations, PostGIS_
+needs to work with geometry fields. Geometry fields should always be
+added via the ``AddGeometryColumn`` function::
+
+ CREATE TABLE package_extent(
+ package_id text PRIMARY KEY
+ );
+
+ ALTER TABLE package_extent OWNER TO ckan_default;
+
+ SELECT AddGeometryColumn('package_extent','the_geom', 4326, 'GEOMETRY', 2);
+
+This will add a geometry column in the ``package_extent`` table called
+``the_geom``, with the spatial reference system EPSG:4326. The stored
+geometries will be polygons, with 2 dimensions (The CKAN table uses the
+GEOMETRY type to support multiple geometry types).
+
+Have a look a the table definition, and see how PostGIS has created
+some constraints to ensure that the geometries follow the parameters
+defined in the geometry column creation::
+
+ # \d package_extent
+
+ Table "public.package_extent"
+ Column | Type | Modifiers
+ ------------+----------+-----------
+ package_id | text | not null
+ the_geom | geometry |
+ Indexes:
+ "package_extent_pkey" PRIMARY KEY, btree (package_id)
+ Check constraints:
+ "enforce_dims_the_geom" CHECK (st_ndims(the_geom) = 2)
+ "enforce_srid_the_geom" CHECK (st_srid(the_geom) = 4326)
+
+.. _PostGIS: http://postgis.org
diff --git a/doc/previews.rst b/doc/previews.rst
new file mode 100644
index 0000000..7b0d737
--- /dev/null
+++ b/doc/previews.rst
@@ -0,0 +1,68 @@
+============================
+Previews for Spatial Formats
+============================
+
+The extension includes some plugins that implement the IResourcePreview_
+interface, allowing to preview spatial resource files. They are based in
+popular Javascript mapping libraries and should be really easy to extend and
+adapt to your own needs.
+
+
+GeoJSON Preview
+---------------
+
+.. image:: _static/preview-geojson.png
+
+The GeoJSON previewer is based on Leaflet_. It will render GeoJSON_ files on a
+map and add a popup showing the features properties, for those resources that
+have a format of ``geojson`` or ``gjosn``.
+
+To enable the GeoJSON previewer you need to add the ``geojson_preview`` plugin
+to your ini file. This plugin also requires the `resource_proxy`_
+plugin (Make sure you load the ``resource_proxy`` plugin before any other
+from the spatial extension)::
+
+ ckan.plugins = resource_proxy geojson_preview
+
+
+WMS Preview
+-----------
+
+.. image:: _static/preview-wms.png
+
+The WMS previewer is based o OpenLayers_. When the plugin is enabled, if
+datasets contain a resource that has ``wms`` format, the resource page will
+load a simple map viewer that will attempt to load the remote service layers,
+based on the GetCapabilities response.
+
+To enable the WMS previewer you need to add the ``wms_preview`` plugin to your
+ini file. This plugin also requires the `resource_proxy`_
+plugin (Make sure you load the ``resource_proxy`` plugin before any other
+from the spatial extension::
+
+ ckan.plugins = resource_proxy wms_preview
+
+.. note:: Please note that the WMS previewer included in ckanext-spatial is
+ just a proof of concept and has important limitations, and is
+ just intended as a bootstrap for developers willing to build a more
+ sophisticated one.
+
+ Some projects that have built more advanced map viewers and
+ integrated them with CKAN include:
+
+ * Data.gov.uk (http://data.gov.uk):
+ - https://github.com/datagovuk/ckanext-dgu
+ - https://github.com/datagovuk/ckanext-os
+
+ * Catalog.data.gov (http://catalog.data.gov):
+ - https://github.com/okfn/ckanext-geodatagov
+ - https://github.com/chilukey/viewer
+
+
+
+.. _IResourcePreview: http://docs.ckan.org/en/latest/writing-extensions.html#ckan.plugins.interfaces.IResourcePreview
+.. _resource_proxy: http://docs.ckan.org/en/latest/data-viewer.html#viewing-remote-resources-the-resource-proxy
+.. _Leaflet: http://leafletjs.org
+.. _GeoJSON: http://geojson.org
+.. _OpenLayers: http://openlayers.org
+
diff --git a/doc/spatial-search.rst b/doc/spatial-search.rst
index 7404ee9..2223dff 100644
--- a/doc/spatial-search.rst
+++ b/doc/spatial-search.rst
@@ -1,9 +1,281 @@
+==============
Spatial Search
==============
-To enable the spatial query you need to add the `spatial_query` plugin to your
-ini file (See `Configuration`_). This plugin requires the `spatial_metadata`
-plugin.
+The spatial extension allows to index datasets with spatial information so they
+can be filtered via a spatial query. This includes both via the web interface
+(see the `Spatial Search Widget`_) or via the `action API`_, e.g.::
+
+ POST http://localhost:5000/api/action/package_search
+ { "q": "Pollution",
+ "facet": "true",
+ "facet.field": "country",
+ "extras": {
+ "ext_bbox": "-7.535093,49.208494,3.890688,57.372349" }
+ }
+
+.. versionchanged:: 2.0.1
+ Starting from this version the spatial filter it is also supported on GET
+ requests:
+
+ http://localhost:5000/api/action/package_search?q=Pollution&ext_bbox=-7.535093,49.208494,3.890688,57.372349
+
+
+Setup
+-----
+
+To enable the spatial query you need to add the ``spatial_query`` plugin to
+your ini file. This plugin requires the ``spatial_metadata`` plugin, eg::
+
+ ckan.plugins = [other plugins] spatial_metadata spatial_query
+
+To define which backend to use for the spatial search use the following
+configuration option (see `Choosing a backend for the spatial search`_)::
+
+ ckanext.spatial.search_backend = solr
+
+
+Geo-Indexing your datasets
+--------------------------
+
+Regardless of the backend that you are using, in order to make a dataset
+queryable by location, an special extra must be defined, with its key named
+'spatial'. The value must be a valid GeoJSON_ geometry, for example::
+
+ {
+ "type":"Polygon",
+ "coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]]
+ }
+
+or::
+
+ {
+ "type": "Point",
+ "coordinates": [-3.145,53.078]
+ }
+
+
+Every time a dataset is created, updated or deleted, the extension will
+synchronize the information stored in the extra with the geometry table.
+
+Choosing a backend for the spatial search
++++++++++++++++++++++++++++++++++++++++++
+
+There are different backends supported for the spatial search, it is important
+to understand their differences and the necessary setup required when choosing
+which one to use.
+
+The following table summarizes the different spatial search backends:
+
++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
+| Backend | Solr Versions | Supported geometries | Sorting and relevance | Performance with large number of datasets |
++========================+===============+=====================================+===========================================================+===========================================+
+| ``solr`` | 3.1 to 4.x | Bounding Box | Yes, spatial sorting combined with other query parameters | Good |
++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
+| ``solr-spatial-field`` | 4.x | Bounding Box, Point and Polygon [1] | Not implemented | Good |
++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
+| ``postgis`` | 1.3 to 4.x | Bounding Box | Partial, only spatial sorting supported [2] | Poor |
++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
+
+
+[1] Requires JTS
+
+[2] Needs ``ckanext.spatial.use_postgis_sorting`` set to True
+
+
+
+We recommend to use the ``solr`` backend whenever possible. Here are more
+details about the available options:
+
+* ``solr`` (Recommended)
+ This option uses normal Solr fields to index the relevant bits of
+ information about the geometry and uses an algorithm function to sort
+ results by relevance, keeping any other non-spatial filtering. It only
+ supports bounding boxes both for the geometries to be indexed and the
+ input query shape. It requires `EDisMax`_ query parser, so it will only
+ work on versions of Solr greater than 3.1 (We recommend using Solr 4.x).
+
+ You will need to add the following fields to your Solr schema file to
+ enable it::
+
+
+
+
+
+
+
+
+
+
+
+* ``solr-spatial-field``
+ This option uses the `spatial field`_ introduced in Solr 4, which allows
+ to index points, rectangles and more complex geometries (complex geometries
+ will require `JTS`_, check the documentation).
+ Sorting has not yet been implemented, users willing to do so will need to
+ modify the query using the ``before_search`` extension point.
+
+ You will need to add the following field type and field to your Solr
+ schema file to enable it (Check the `Solr documentation`__ for more
+ information on the different parameters, note that you don't need
+ ``spatialContextFactory`` if you are not using JTS)::
+
+
+
+
+
+
+
+
+
+
+* ``postgis``
+ This is the original implementation of the spatial search. It
+ does not require any change in the Solr schema and can run on Solr 1.x,
+ but it is not as efficient as the previous ones. Basically the bounding
+ box based query is performed in PostGIS first, and the ids of the matched
+ datasets are added as a filter to the Solr request. This, apart from being
+ much less efficient, can led to issues on Solr due to size of the requests
+ (See `Solr configuration issues on legacy PostGIS backend`_). There is
+ support for a spatial ranking on this backend (setting
+ ``ckanext.spatial.use_postgis_sorting`` to True on the ini file), but
+ it can not be combined with any other filtering.
+
+
+Spatial Search Widget
+---------------------
+
+
+.. image:: _static/spatial-search-widget.png
+
+The extension provides a snippet to add a map widget to the search form, which
+allows filtering results by an area of interest.
+
+To add the map widget to the to the sidebar of the search page, add this to the
+dataset search page template
+(``myproj/ckanext/myproj/templates/package/search.html``)::
+
+ {% block secondary_content %}
+
+ {% snippet "spatial/snippets/spatial_query.html" %}
+
+ {% endblock %}
+
+By default the map widget will show the whole world. If you want to set up a
+different default extent, you can pass an extra ``default_extent`` to the
+snippet, either with a pair of coordinates like this::
+
+ {% snippet "spatial/snippets/spatial_query.html", default_extent="[[15.62,
+ -139.21], [64.92, -61.87]]" %}
+
+or with a GeoJSON object describing a bounding box (note the escaped quotes)::
+
+ {% snippet "spatial/snippets/spatial_query.html", default_extent="{ \"type\":
+ \"Polygon\", \"coordinates\": [[[74.89, 29.39],[74.89, 38.45], [60.50,
+ 38.45], [60.50, 29.39], [74.89, 29.39]]]}" %}
+
+You need to load the ``spatial_metadata`` and ``spatial_query`` plugins to use this
+snippet.
+
+
+
+Dataset Extent Map
+------------------
+
+.. image:: _static/dataset-extent-map.png
+
+Using the snippets provided, if datasets contain a ``spatial`` extra like the
+one described in the previous section, a map will be shown on the dataset
+details page.
+
+There are snippets already created to load the map on the left sidebar or in
+the main body of the dataset details page, but these can be easily modified to
+suit your project needs
+
+To add a map to the sidebar, add this to the dataset details page template (eg
+``myproj/ckanext/myproj/templates/package/read.html``)::
+
+ {% block secondary_content %}
+ {{ super() }}
+
+ {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %}
+ {% if dataset_extent %}
+ {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %}
+ {% endif %}
+
+ {% endblock %}
+
+For adding the map to the main body, add this::
+
+ {% block primary_content %}
+
+
+
+
+
+
+
+ {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %}
+ {% if dataset_extent %}
+ {% snippet "spatial/snippets/dataset_map.html", extent=dataset_extent %}
+ {% endif %}
+
+
+ {% endblock %}
+
+
+You need to load the ``spatial_metadata`` plugin to use these snippets.
+
+Legacy Search
+-------------
+
+Solr configuration issues on legacy PostGIS backend
++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+.. warning::
+
+ If you find any of the issues described in this section it is strongly
+ recommended that you consider switching to one of the Solr based backends
+ which are much more efficient. These notes are just kept for informative
+ purposes.
+
+
+If using Spatial Query functionality then there is an additional SOLR/Lucene
+setting that should be used to set the limit on number of datasets searchable
+with a spatial value.
+
+The setting is ``maxBooleanClauses`` in the solrconfig.xml and the value is the
+number of datasets spatially searchable. The default is ``1024`` and this could
+be increased to say ``16384``. For a SOLR single core this will probably be at
+`/etc/solr/conf/solrconfig.xml`. For a multiple core set-up, there will me
+several solrconfig.xml files a couple of levels below `/etc/solr`. For that
+case, *all* of the cores' `solrconfig.xml` should have this setting at the new
+value.
+
+Example::
+
+ 16384
+
+This setting is needed because PostGIS spatial query results are fed into SOLR
+using a Boolean expression, and the parser for that has a limit. So if your
+spatial area contains more than the limit (of which the default is 1024) then
+you will get this error::
+
+ Dataset search error: ('SOLR returned an error running query...
+
+and in the SOLR logs you see::
+
+ too many boolean clauses ... Caused by:
+ org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to
+ 1024
+
+
+Legacy API
+++++++++++
The extension adds the following call to the CKAN search API, which returns
datasets with an extent that intersects with the bounding box provided::
@@ -11,55 +283,15 @@ datasets with an extent that intersects with the bounding box provided::
/api/2/search/dataset/geo?bbox={minx,miny,maxx,maxy}[&crs={srid}]
If the bounding box coordinates are not in the same projection as the one
-defined in the database, a CRS must be provided, in one of the following
-forms:
+defined in the database, a CRS must be provided, in one of the following forms:
-- urn:ogc:def:crs:EPSG::4326
+- `urn:ogc:def:crs:EPSG::4326`
- EPSG:4326
- 4326
-As of CKAN 1.6, you can integrate your spatial query in the full CKAN
-search, via the web interface (see the `Spatial Query Widget`_) or
-via the `action API`__, e.g.::
-
- POST http://localhost:5000/api/action/package_search
- {
- "q": "Pollution",
- "extras": {
- "ext_bbox": "-7.535093,49.208494,3.890688,57.372349"
- }
- }
-
-__ http://docs.ckan.org/en/latest/apiv3.html
-
-Geo-Indexing your datasets
---------------------------
-
-In order to make a dataset queryable by location, an special extra must
-be defined, with its key named 'spatial'. The value must be a valid GeoJSON_
-geometry, for example::
-
- {"type":"Polygon","coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]]}
-
-or::
-
- { "type": "Point", "coordinates": [-3.145,53.078] }
-
+.. _action API: http://docs.ckan.org/en/latest/apiv3.html
+.. _edismax: http://wiki.apache.org/solr/ExtendedDisMax
+.. _JTS: http://www.vividsolutions.com/jts/JTSHome.htm
+.. _spatial field: http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
+__ `spatial field`_
.. _GeoJSON: http://geojson.org
-
-Every time a dataset is created, updated or deleted, the extension will synchronize
-the information stored in the extra with the geometry table.
-
-
-Spatial Search Widget
----------------------
-
-**Note**: this plugin requires CKAN 1.6 or higher.
-
-To enable the search map widget you need to add the `spatial_query_widget` plugin to your
-ini file (See `Configuration`_). You also need to load both the `spatial_metadata`
-and the `spatial_query` plugins.
-
-When the plugin is enabled, a map widget will be shown in the dataset search form,
-where users can refine their searchs drawing an area of interest.
-