diff --git a/.gitignore b/.gitignore index 39f7065..3304cfa 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ syntax: glob *.swp *~ dist +build diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..17ee088 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "doc/_themes/sphinx-theme-okfn"] + path = doc/_themes/sphinx-theme-okfn + url = git@github.com:okfn/sphinx-theme-okfn.git diff --git a/doc/_static/ckanlogo.png b/doc/_static/ckanlogo.png new file mode 100644 index 0000000..a234cc0 Binary files /dev/null and b/doc/_static/ckanlogo.png differ diff --git a/doc/_static/dataset-extent-map.png b/doc/_static/dataset-extent-map.png new file mode 100644 index 0000000..4b788d2 Binary files /dev/null and b/doc/_static/dataset-extent-map.png differ diff --git a/doc/_static/preview-geojson.png b/doc/_static/preview-geojson.png new file mode 100644 index 0000000..c2ea881 Binary files /dev/null and b/doc/_static/preview-geojson.png differ diff --git a/doc/_static/preview-wms.png b/doc/_static/preview-wms.png new file mode 100644 index 0000000..1d947d3 Binary files /dev/null and b/doc/_static/preview-wms.png differ diff --git a/doc/_static/spatial-search-widget.png b/doc/_static/spatial-search-widget.png new file mode 100644 index 0000000..6b83ce6 Binary files /dev/null and b/doc/_static/spatial-search-widget.png differ diff --git a/doc/_templates/footer.html b/doc/_templates/footer.html new file mode 100644 index 0000000..93ac4bc --- /dev/null +++ b/doc/_templates/footer.html @@ -0,0 +1,29 @@ +

An Open Knowledge Foundation project.

+ + + +

+Source +— +Issues +— +Mailing List +— +Twitter @CKANProject +

+ +

+Related Projects: +The DataHub +— +DataCatalogs.org +— +OpenSpending.org +— +Open Data Handbook +

+ diff --git a/doc/_themes/sphinx-theme-okfn b/doc/_themes/sphinx-theme-okfn new file mode 160000 index 0000000..59688a6 --- /dev/null +++ b/doc/_themes/sphinx-theme-okfn @@ -0,0 +1 @@ +Subproject commit 59688a6679f3373a57e8d4e60e43f1b249878eb3 diff --git a/doc/conf.py b/doc/conf.py index ea722e3..93a6937 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -25,7 +25,7 @@ import sys, os # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.pngmath'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.intersphinx'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -89,12 +89,14 @@ pygments_style = 'sphinx' # If true, keep warnings as "system message" paragraphs in the built documents. #keep_warnings = False +exclude_trees = ['.build'] + # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +#html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -103,6 +105,18 @@ html_theme = 'default' # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] +sys.path.append(os.path.abspath('_themes')) +html_theme_path = ['_themes'] +html_theme = 'sphinx-theme-okfn' +html_theme_options = { + 'logo_icon': 'ckanlogo.png', + 'show_version': False, + 'show_okfn_logo': False, + } +html_sidebars = { + '**': ['globaltoc.html'] +} + # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". diff --git a/doc/csw.rst b/doc/csw.rst new file mode 100644 index 0000000..0bbd792 --- /dev/null +++ b/doc/csw.rst @@ -0,0 +1,260 @@ +=========== +CSW support +=========== + +The extension provides the support for the CSW_ standard, a specification from +the Open Geospatial Consortium for exposing geospatial catalogues over the web. + +This support consists of: + +* Ability to import records from CSW servers with the CSW harvester. See + :doc:`harvesters` for more details. + +* Integration with pycsw_ to provide a fully compliat CSW interface for + harvested records. This integration is described on the following sections. + + +ckan-pycsw +---------- + +The spatial extension offers the ``ckan-pycsw`` command, which allows to expose +the spatial datasets harvested from other sources in a CSW interface. This is +powered by pycsw_, which fully implements the OGC CSW specification. + +How it works +++++++++++++ + + +The current implementation is based on CKAN and pycsw being loosely integrated +via the CKAN API. pycsw will be generally installed in the same server as CKAN +(although it can also be run on a separate one), and the synchronization +command will be run regularly to keep the records on the pycsw repository up to +date. This is done using the CKAN API to get all the datasets identifiers (more +precisely the ones from datasets that have been harvested) and then deciding +which ones need to be created, updated or deleted on the pycsw repository. For +those that need to be created or updated, the original harvested spatial +document (ie ISO 19139) is requested from CKAN, and it is then imported using +pycsw internal functions:: + + Harvested + datasets + + + | + v + +--------+ +---------+ + | | CKAN API | | + | CKAN | +------------> | pycsw | +------> CSW + | | | | + +--------+ +---------+ + + +Remember, only datasets that were harvested with the :doc:`harvesters` +can currently be exposed via pycsw. + +All necessary tasks are done with the ``ckan-pycsw`` command. To get more +details of its usage, run the following:: + + cd /usr/lib/ckan/default/src/ckanext-spatial + paster ckan-pycsw --help + + +Setup ++++++ + +1. Install pycsw. There are several options for this, depending on your + server setup, check the `pycsw documentation`_. + + .. note:: CKAN integration requires at least pycsw version 1.6.1. Make sure + to install at least this version. + + The following instructions assume that you have installed CKAN via a + `package install`_ and should be run as root, but the steps are the same if + you are setting it up in another location:: + + cd /usr/lib/ckan/default/src + source ../bin/activate + + # From now on the virtualenv should be activated + + git clone https://github.com/geopython/pycsw.git + cd pycsw + # Remember to use at least pycsw 1.6.1 + git checkout 1.6.1 + pip install -e . + python setup.py build + python setup.py install + +2. Create a database for pycsw. In theory you can use the same database that + CKAN is using, but if you want to keep them separated, use the following + command to create a new one (we'll use the same default user though):: + + sudo -u postgres createdb -O ckan_default pycsw -E utf-8 + + It is strongly recommended that you install PostGIS in the pycsw databaset, + so its spatial functions are used. See the :ref:`install_postgis` + section for details. + +3. Configure pycsw. An example configuration file is included on the source:: + + cp default-sample.cfg default.cfg + + To keep things tidy we will create a symlink to this file on the CKAN + configuration directory:: + + ln -s /usr/lib/ckan/default/src/pycsw/default.cfg /etc/ckan/default/pycsw.cfg + + Open the file with your favourite editor. The main settings you should tweak + are ``server.home`` and ``repository.database``:: + + [server] + home=/usr/lib/ckan/default/src/pycsw + ... + [repository] + database=postgresql://ckan_default:pass@localhost/pycsw + + The rest of the options are described `here `_. + +4. Setup the pycsw table. This is done with the ``ckan-pycsw`` paster command + (Remember to have the virtualenv activated when running it):: + + cd /usr/lib/ckan/default/src/ckanext-spatial + paster ckan-pycsw setup -p /etc/ckan/default/pycsw.cfg + + At this point you should be ready to run pycsw with the wsgi script that it + includes:: + + cd /usr/lib/ckan/default/src/pycsw + python csw.wsgi + + This will run pycsw at http://localhost:8000. Visiting the following URL + should return you the Capabilities file: + + http://localhost:8000/?service=CSW&version=2.0.2&request=GetCapabilities + +5. Load the CKAN datasets into pycsw. Again, we will use the ``ckan-pycsw`` + command for this:: + + cd /usr/lib/ckan/default/src/ckanext-spatial + paster ckan-pycsw load -p /etc/ckan/default/pycsw.cfg + + .. note:: If you get errors similar to this one, this is caused by + limitations on the pycsw model definition. This should be fixed in + future versions of pycsw:: + + ERROR: not inserted f8d48eaf-780b-40b8-a502-7a903fde5b1c Error:ERROR: value too long for type character varying(256) + + + When the loading is finished, check that results are returned when visiting + this link: + + http://localhost:8000/?request=GetRecords&service=CSW&version=2.0.2&resultType=results&outputSchema=http://www.isotc211.org/2005/gmd&typeNames=csw:Record&elementSetName=summary + + The ``numberOfRecordsMatched`` should match the number of harvested datasets + in CKAN (minus import errors). If you run the command again new or udpated + datasets will be synchronized and deleted datasets from CKAN will be removed + from pycsw as well. + +Running it on production site ++++++++++++++++++++++++++++++ + +On a production site you probably want to run the load command regularly to +keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN. + +* To run the load command regularly you can set up a cron job. Type ``crontab -e`` + and copy the following lines:: + + # m h dom mon dow command + 0 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-spatial ckan-pycsw load -p /etc/ckan/default/pycsw.cfg + + This particular example will run the load command every hour. You can of + course modify this periodicity, for instance reducing it for huge instances. + This `Wikipedia page `_ + has a good overview of the crontab syntax. + +* To run pycsw under Apache check the pycsw `installation documentation `_ + or follow this quick steps (they assume the paths used on the previous steps): + + - Edit ``/etc/apache2/sites-available/ckan_default`` and add the following + line just before the existing ``WSGIScriptAlias`` directive:: + + WSGIScriptAlias /csw /usr/lib/ckan/default/src/pycsw/csw.wsgi + + - Edit the ``/usr/lib/ckan/default/src/pycsw/csw.wsgi`` file and add these two + lines just after the imports on the top of the file:: + + activate_this = os.path.join('/usr/lib/ckan/default/bin/activate_this.py') + execfile(activate_this, {"__file__":activate_this}) + + We need these to activate the virtualenv where we installed pycsw into. + + - Restart Apache:: + + service apache2 restart + + pycsw should be now accessible at http://localhost/csw + + +Legacy plugins and libraries +---------------------------- + + +Old CSW Server +++++++++++++++ + +.. warning:: **Deprecated:** The old csw plugin has been deprecated, please see `ckan-pycsw`_ + for details on how to integrate with pycsw. + +To activate it, add the ``csw_server`` plugin to your ini file. + +Only harvested datasets are served by this CSW Server. This is because +the harvested document is the one that is served, not something derived +from the CKAN Dataset object. Datasets that are created in CKAN by methods +other than harvesting are not served. + +The currently supported methods with this CSW Server are: + * GetCapabilities + * GetRecords + * GetRecordById + +For example you can ask the capabilities of the CSW server installed into CKAN +running on 127.0.0.1:5000 like this:: + + curl 'http://127.0.0.1:5000/csw?request=GetCapabilities&service=CSW&version=2.0.2' + +And get a list of the records like this:: + + curl 'http://127.0.0.1:5000/csw?request=GetRecords&service=CSW&resultType=results&elementSetName=full&version=2.0.2' + +The standard CSW response is in XML format. + +cswinfo ++++++++ + +The command-line tool ``cswinfo`` allows to make queries on CSW servers and +returns the info in nicely formatted JSON. This may be more convenient to type +than using, for example, curl. + +Currently available queries are: + * getcapabilities + * getidentifiers + * getrecords + * getrecordbyid + +For details, type:: + + cswinfo csw -h + +There are options for querying by only certain types, keywords and typenames +as well as configuring the ElementSetName. + +The equivalent example to the one above for asking the cabailities is:: + + $ cswinfo csw getcapabilities http://127.0.0.1:5000/csw + +OWSLib is the library used to actually perform the queries. + +.. _pycsw: http://pycsw.org +.. _pycsw documentation: http://pycsw.org/docs/installation.html +.. _package install: http://docs.ckan.org/en/latest/install-from-package.html +.. _CSW: http://www.opengeospatial.org/standards/cat + diff --git a/doc/dataset-map.rst b/doc/dataset-map.rst deleted file mode 100644 index 92dd43e..0000000 --- a/doc/dataset-map.rst +++ /dev/null @@ -1,9 +0,0 @@ -Dataset Extent Map ------------------- - -To enable the dataset map you need to add the `dataset_extent_map` plugin to your -ini file (See `Configuration`_). You need to load the `spatial_metadata` plugin also. - -When the plugin is enabled, if datasets contain a 'spatial' extra like the one -described in the previous section, a map will be shown on the dataset details page. - diff --git a/doc/harvesters.rst b/doc/harvesters.rst new file mode 100644 index 0000000..69cd9b4 --- /dev/null +++ b/doc/harvesters.rst @@ -0,0 +1,142 @@ +================== +Spatial Harvesters +================== + +Overview and Configuration +-------------------------- + +The spatial extension provides some harvesters for importing ISO19139-based +metadata into CKAN, as well as providing a base class for writing new ones. +The harvesters use the interface provided by ckanext-harvest_, so you will need +to install and set it up first. + +Once ckanext-harvest is installed, you can add the following plugins to your +ini file to enable the different harvesters (If you are upgrading from a +previous version to CKAN 2.0 see legacy_harvesters_): + +* ``csw_harvester`` - CSW server +* ``waf_harvester`` - WAF (Web Accessible Folder): An online accessible index + page with links to metadata documents +* ``doc_harvester`` - A single online accessible metadata document. + +Have a look at the `ckanext-harvest documentation`_ if you want to have an +overview of how the CKAN harvesters work, but basically there are three +separate stages: + +* gather_stage - Aggregates all the remote identifiers for a particular source + (eg identifiers for a CSW server, files for a WAF). +* fetch_stage - Fetches all the remote documents and stores them on the + database. +* import_stage - Performs all the processing for transforming the remote + content into a CKAN dataset: validates the document, parses it, converts it + to a CKAN dataset dict and saves it in the database. + +The extension provides different XSD and schematron based validators. You can +specify which validators to use for the remote documents with the following +configuration option:: + + ckan.spatial.validator.profiles = iso19193eden + +By default, the import stage will stop if the validation of the harvested +document fails. This can be modified setting the +``ckanext.spatial.harvest.continue_on_validation_errors`` to True. The setting +can also be applied at the source level setting to True the +``continue_on_validation_errors`` key on the source configuration object. + +By default the harvesting actions (eg creating or updating datasets) will be +performed by the internal site admin user. This is the recommended setting, +but if necessary, it can be overridden with the +``ckanext.spatial.harvest.user_name`` config option, eg to support the old +hardcoded 'harvest' user:: + + ckanext.spatial.harvest.user_name = harvest + +Customizing the harvesters +-------------------------- + +The default harvesters provided in this extension can be overriden from +extensions to customize to your needs. You can either extend ``CswHarvester``, +``WAFfHarverster`` or the main ``SpatialHarvester`` class. There are some +extension points that can be safely overriden from your extension. Probably the +most useful is ``get_package_dict``, which allows to tweak the dataset fields +before creating or updating them. ``transform_to_iso`` allows to hook into +transformation mechanisms to transform other formats into ISO1939, the only one +directly supported byt he spatial harvesters. Finally, the whole +``import_stage`` can be overriden if the default logic does not suit your +needs. + +Check the source code of ``ckanext/spatial/harvesters/base.py`` for more +details on these functions. + +The `ckanext-geodatagov`_ extension contains live examples on how to extend +the default spatial harvesters and create new ones for other spatial services +like ArcGIS REST APIs. + + +Harvest Metadata API +-------------------- + +This plugin allows to access the actual harvested document via API requests. +It is enabled with the following plugin:: + + ckan.plugins = spatial_harvest_metadata_api + +(It was previously known as ``inspire_api``) + +To view the harvest objects (containing the harvested metadata) in the web +interface, these controller locations are added: + +* raw XML document: /harvest/object/{id} +* HTML representation: /harvest/object/{id}/html + +.. note:: The old URLs are now deprecated and redirect to the previously + mentioned: + + * /api/2/rest/harvestobject//xml + * /api/2/rest/harvestobject//html + + +For those harvest objects that have an original document (which was transformed +to ISO), this can be accessed via: + +* raw XML document: /harvest/object/{id}/original +* HTML representation: /harvest/object/{id}/html/original + +The HTML representation is created via an XSLT transformation. The extension +provides an XSLT file that should work on ISO 19139 based documents, but if you +want to use your own on your extension, you can override it using the following +configuration options:: + + ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt + ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt + +If your project does not transform different metadata types you can ignore the +second option. + +.. _legacy_harvesters: + +Legacy harvesters +----------------- + +Prior to CKAN 2.0, the spatial harvesters available on this extension were +based on the GEMINI2 format, an ISO19139 profile used by the UK Location +Programme, and the logic for creating or updating datasets and the resulting +fields were somehow adapted to the needs for this particular project. The +harvesters were still generic enough and should work fine with other ISO19139 +based sources, but extra care has been put to make the new harvesters more +generic and robust, so these ones should only be used on existing instances: + +* ``gemini_csw_harvester`` +* ``gemini_waf_harvester`` +* ``gemini_doc_harvester`` + +If you are using these harvesters please consider upgrading to the new +versions described on the previous section. + + +.. todo:: Validation library details + + +.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest +.. _ckanext-harvest documentation: https://github.com/okfn/ckanext-harvest#the-harvesting-interface +.. _ckanext-geodatagov: https://github.com/okfn/ckanext-geodatagov/blob/master/ckanext/geodatagov/harvesters/ diff --git a/doc/index.ckan b/doc/index.ckan deleted file mode 100644 index 67b8e99..0000000 --- a/doc/index.ckan +++ /dev/null @@ -1,19 +0,0 @@ -=============================== -Welcome to ckanext-spatial docs -=============================== - - -SPATIAL!! - -.. note :: - - This is the documentation for CKAN version '|version|'. If you are using a different version, use the links on the bottom right corner of the page to select the appropriate documentation. - -This Administration Guide covers how to set up and manage `CKAN `_ software. - -* The first two sections cover your two options for installing CKAN: package or source install. -* The rest of the first half of the Guide, up to :doc:`authorization`, covers setup and basic admin. -* The second half of the Guide, from :doc:`extensions` onwards, covers advanced tasks, including extensions and forms. - -For high-level information on what CKAN is, see the `CKAN website `_. - diff --git a/doc/index.rst b/doc/index.rst index 60b2822..9a6010f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,50 +1,33 @@ -.. ckanext-spatial documentation master file, created by - sphinx-quickstart on Wed Apr 10 17:17:12 2013. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to ckanext-spatial's documentation! -=========================================== - ============================================== ckanext-spatial - Geo related plugins for CKAN ============================================== -This extension contains plugins that add geospatial capabilities to CKAN. -The following plugins are currently available: +This extension contains plugins that add geospatial capabilities to CKAN_. -* Spatial model for CKAN datasets and automatic geo-indexing (`spatial_metadata`) -* Spatial Search - Spatial search integration and API call (`spatial_query`). -* Spatial Search Widget - Map widget integrated on the search form (`spatial_query_widget`). -* Dataset Extent Map - Map widget showing a dataset extent (`dataset_extent_map`). -* WMS Preview - a Web Map Service (WMS) previewer (`wms_preview`). -* CSW Server - a basic CSW server - to server metadata from the CKAN instance (`cswserver`) -* GEMINI Harvesters - for importing INSPIRE-style metadata into CKAN (`gemini_csw_harvester`, `gemini_doc_harvester`, `gemini_waf_harvester`) -* Harvest Metadata API - a way for a user to view the harvested metadata XML, either as a raw file or styled to view in a web browser. (`spatial_harvest_metadata_api`) +You should have a CKAN instance installed before adding these plugins. Head to +the `CKAN documentation`_ for information on how to set up CKAN. -These libraries: -* CSW Client - a basic client for accessing a CSW server -* Validators - uses XSD / Schematron to validate geographic metadata XML. Used by the GEMINI Harvesters -* Validators for ISO19139/INSPIRE/GEMINI2 metadata. Used by the Validator. +The extension adds a spatial field to the default CKAN dataset schema, +using PostGIS_ as the backend. This allows to perform spatial queries and +display the dataset extent on the frontend. It also provides harvesters to +import geospatial metadata into CKAN from other sources, as well as commands +to support the CSW standard. Finally, it also includes plugins to preview +spatial formats such as GeoJSON_. -And these command-line tools: -* cswinfo - a command-line tool to help making requests of any CSW server - -As of October 2012, ckanext-csw and ckanext-inspire were merged into this extension. Contents: .. toctree:: :maxdepth: 2 - + + install spatial-search - dataset-map + harvesters + csw + previews - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. _CKAN: http://ckan.org +.. _CKAN Documentation: http://docs.ckan.org +.. _PostGIS: http://postgis.org +.. _GeoJSON: http://geojson.org diff --git a/doc/install.rst b/doc/install.rst new file mode 100644 index 0000000..af67b06 --- /dev/null +++ b/doc/install.rst @@ -0,0 +1,243 @@ +====================== +Installation and Setup +====================== + +Check the Troubleshooting_ section if you get errors at any stage. + +.. _install_postgis: + +Install PostGIS and system packages +----------------------------------- + +.. note:: If you *only* want to load the :doc:`previews` you don't need to + install any of the packages on this section and can skip to the + next one. + +.. note:: The package names and paths shown are the defaults on an Ubuntu + 12.04 install (PostgreSQL 9.1 and PostGIS 1.5). Adjust the + package names and the paths if you are using a different version of + any of them. + +All commands assume an existing CKAN database named ``ckan_default``. + + +#. Install PostGIS:: + + sudo apt-get install postgresql-9.1-postgis + +#. Run the following commands. The first one will create the necessary + tables and functions in the database, and the second will populate + the spatial reference table:: + + sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/postgis.sql + sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/spatial_ref_sys.sql + + .. note:: If using PostgreSQL 8.x, run the following command to enable + the necessary language:: + + sudo -u postgres createlang plpgsql ckan_default + +#. Change the owner to spatial tables to the CKAN user to avoid errors later + on:: + + ALTER TABLE spatial_ref_sys OWNER TO ckan_default; + ALTER TABLE geometry_columns OWNER TO ckan_default; + +#. Execute the following command to see if PostGIS was properly + installed:: + + sudo -u postgres psql -d ckan_default -c "SELECT postgis_full_version()" + + You should get something like:: + + postgis_full_version + ------------------------------------------------------------------------------------------------------ + POSTGIS="1.5.2" GEOS="3.2.2-CAPI-1.6.2" PROJ="Rel. 4.7.1, 23 September 2009" LIBXML="2.7.7" USE_STATS + (1 row) + + +#. Install some other packages needed by the extension dependencies:: + + sudo apt-get install python-dev libxml2-dev libxslt1-dev libgeos-c1 + + +Install the extension +--------------------- + +1. Install this extension into your python environment (where CKAN is also + installed). + + .. note:: Depending on the CKAN core version you are targeting you will need + to use a different branch from the extension. + + For a production site, use the ``stable`` branch, unless there is a specific + branch that targets the CKAN core version that you are using. + + To target the latest CKAN core release:: + + (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@stable#egg=ckanext-spatial + + To target an old release (if a release branch exists, otherwise use + ``stable``):: + + (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@release-v1.8#egg=ckanext-spatial + + To target CKAN ``master``, use the extension ``master`` branch (ie no + branch defined):: + + (pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git#egg=ckanext-spatial + + +2. Install the rest of python modules required by the extension:: + + (pyenv) $ pip install -r pip-requirements.txt + +To use the :doc:`harvesters`, you will need to install and configure the +harvester extension: `ckanext-harvest`_. Follow the install instructions on +its documentation for details on how to set it up. + + +Configuration +------------- + +Once PostGIS is installed and configured in the database the extension needs +to create a table to store the datasets extent, called ``package_extent``. + +This will happen automatically the next CKAN is restarted after adding the +plugins on the configuration ini file (eg when restarting Apache). + +If for some reason you need to explicitly create the table beforehand, you can +do it with the following command (with the virtualenv activated):: + + (pyenv) $ paster --plugin=ckanext-spatial spatial initdb [srid] --config=mysite.ini + +You can define the SRID of the geometry column. Default is 4326. If you are not +familiar with projections, we recommend to use the default value. To know more +about PostGIS tables, see :doc:`postgis-manual` + +Each plugin can be enabled by adding its name to the ``ckan.plugins`` in the +CKAN ini file. For example:: + + ckan.plugins = spatial_metadata spatial_query + +When enabling the spatial metadata, you can define the projection in which +extents are stored in the database with the following option. Use the EPSG code +as an integer (e.g 4326, 4258, 27700, etc). It defaults to 4326:: + + ckan.spatial.srid = 4326 + + +Troubleshooting +--------------- + +Here are some common problems you may find when installing or using the +extension: + +When initializing the spatial tables +++++++++++++++++++++++++++++++++++++ + +:: + + LINE 1: SELECT AddGeometryColumn('package_extent','the_geom', E'4326... + ^ + HINT: No function matches the given name and argument types. You might need to add explicit type casts. + "SELECT AddGeometryColumn('package_extent','the_geom', %s, 'GEOMETRY', 2)" ('4326',) + + +PostGIS was not installed correctly. Please check the "Setting up PostGIS" +section. + +:: + + sqlalchemy.exc.ProgrammingError: (ProgrammingError) permission denied for relation spatial_ref_sys + + +The user accessing the ckan database needs to be owner (or have permissions) +of the geometry_columns and spatial_ref_sys tables. + +When migrating to an existing PostGIS database +++++++++++++++++++++++++++++++++++++++++++++++ + +If you are loading a database dump to an existing PostGIS database, you may +find errors like :: + + ERROR: type "spheroid" already exists + +This means that the PostGIS functions are installed, but you may need to +create the necessary tables anyway. You can force psql to ignore these +errors and continue the transaction with the ON_ERROR_ROLLBACK=on:: + + sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/8.4/contrib/postgis-1.5/postgis.sql -v ON_ERROR_ROLLBACK=on + +You will still need to populate the spatial_ref_sys table and change the +tables permissions. Refer to the previous section for details on how to do +it. + +When performing a spatial query ++++++++++++++++++++++++++++++++ + +:: + + InvalidRequestError: SQL expression, column, or mapped entity expected - got '' + +The spatial model has not been loaded. You probably forgot to add the +``spatial_metadata`` plugin to your ini configuration file. + +:: + + InternalError: (InternalError) Operation on two geometries with different SRIDs + +The spatial reference system of the database geometry column and the one +used by CKAN differ. Remember, if you are using a different spatial +reference system from the default one (WGS 84 lat/lon, EPSG:4326), you must +define it in the configuration file as follows:: + + ckan.spatial.srid = 4258 + +When running the spatial harvesters ++++++++++++++++++++++++++++++++++++ + +:: + + File "xmlschema.pxi", line 102, in lxml.etree.XMLSchema.__init__ (src/lxml/lxml.etree.c:154475) + lxml.etree.XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml}doubleList'., line 1 + +The XSD validation used by the spatial harvesters requires libxml2 ersion 2.9. + +With CKAN you would probably have installed an older version from your +distribution. (e.g. with ``sudo apt-get install libxml2-dev``). You need to +find the SO files for the old version:: + + $ find /usr -name "libxml2.so" + +For example, it may show it here: ``/usr/lib/x86_64-linux-gnu/libxml2.so``. +The directory of the SO file is used as a parameter to the ``configure`` next +on. + +Download the libxml2 source:: + + $ cd ~ + $ wget ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz + +Unzip it:: + + $ tar zxvf libxml2-2.9.0.tar.gz + $ cd libxml2-2.9.0/ + +Configure with the SO directory you found before:: + + $ ./configure --libdir=/usr/lib/x86_64-linux-gnu + +Now make it and install it:: + + $ make + $ sudo make install + +Now check the install by running xmllint:: + + $ xmllint --version + xmllint: using libxml version 20900 + compiled with: Threads Tree Output Push Reader Patterns Writer SAXv1 FTP HTTP DTDValid HTML Legacy C14N Catalog XPath XPointer XInclude Iconv ISO8859X Unicode Regexps Automata Expr Schemas Schematron Modules Debug Zlib + +.. _PostGIS: http://postgis.org +.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest diff --git a/doc/postgis-manual.rst b/doc/postgis-manual.rst new file mode 100644 index 0000000..5fd08ee --- /dev/null +++ b/doc/postgis-manual.rst @@ -0,0 +1,43 @@ +========================== +Setting up a PostGIS table +========================== + +.. note:: The extension will generally set up the table automatically for you, + and also running the ``initdb`` command will have the same effect. This + section just describes what's going on for those who want to know more. + +To be able to store geometries and perform spatial operations, PostGIS_ +needs to work with geometry fields. Geometry fields should always be +added via the ``AddGeometryColumn`` function:: + + CREATE TABLE package_extent( + package_id text PRIMARY KEY + ); + + ALTER TABLE package_extent OWNER TO ckan_default; + + SELECT AddGeometryColumn('package_extent','the_geom', 4326, 'GEOMETRY', 2); + +This will add a geometry column in the ``package_extent`` table called +``the_geom``, with the spatial reference system EPSG:4326. The stored +geometries will be polygons, with 2 dimensions (The CKAN table uses the +GEOMETRY type to support multiple geometry types). + +Have a look a the table definition, and see how PostGIS has created +some constraints to ensure that the geometries follow the parameters +defined in the geometry column creation:: + + # \d package_extent + + Table "public.package_extent" + Column | Type | Modifiers + ------------+----------+----------- + package_id | text | not null + the_geom | geometry | + Indexes: + "package_extent_pkey" PRIMARY KEY, btree (package_id) + Check constraints: + "enforce_dims_the_geom" CHECK (st_ndims(the_geom) = 2) + "enforce_srid_the_geom" CHECK (st_srid(the_geom) = 4326) + +.. _PostGIS: http://postgis.org diff --git a/doc/previews.rst b/doc/previews.rst new file mode 100644 index 0000000..7b0d737 --- /dev/null +++ b/doc/previews.rst @@ -0,0 +1,68 @@ +============================ +Previews for Spatial Formats +============================ + +The extension includes some plugins that implement the IResourcePreview_ +interface, allowing to preview spatial resource files. They are based in +popular Javascript mapping libraries and should be really easy to extend and +adapt to your own needs. + + +GeoJSON Preview +--------------- + +.. image:: _static/preview-geojson.png + +The GeoJSON previewer is based on Leaflet_. It will render GeoJSON_ files on a +map and add a popup showing the features properties, for those resources that +have a format of ``geojson`` or ``gjosn``. + +To enable the GeoJSON previewer you need to add the ``geojson_preview`` plugin +to your ini file. This plugin also requires the `resource_proxy`_ +plugin (Make sure you load the ``resource_proxy`` plugin before any other +from the spatial extension):: + + ckan.plugins = resource_proxy geojson_preview + + +WMS Preview +----------- + +.. image:: _static/preview-wms.png + +The WMS previewer is based o OpenLayers_. When the plugin is enabled, if +datasets contain a resource that has ``wms`` format, the resource page will +load a simple map viewer that will attempt to load the remote service layers, +based on the GetCapabilities response. + +To enable the WMS previewer you need to add the ``wms_preview`` plugin to your +ini file. This plugin also requires the `resource_proxy`_ +plugin (Make sure you load the ``resource_proxy`` plugin before any other +from the spatial extension:: + + ckan.plugins = resource_proxy wms_preview + +.. note:: Please note that the WMS previewer included in ckanext-spatial is + just a proof of concept and has important limitations, and is + just intended as a bootstrap for developers willing to build a more + sophisticated one. + + Some projects that have built more advanced map viewers and + integrated them with CKAN include: + + * Data.gov.uk (http://data.gov.uk): + - https://github.com/datagovuk/ckanext-dgu + - https://github.com/datagovuk/ckanext-os + + * Catalog.data.gov (http://catalog.data.gov): + - https://github.com/okfn/ckanext-geodatagov + - https://github.com/chilukey/viewer + + + +.. _IResourcePreview: http://docs.ckan.org/en/latest/writing-extensions.html#ckan.plugins.interfaces.IResourcePreview +.. _resource_proxy: http://docs.ckan.org/en/latest/data-viewer.html#viewing-remote-resources-the-resource-proxy +.. _Leaflet: http://leafletjs.org +.. _GeoJSON: http://geojson.org +.. _OpenLayers: http://openlayers.org + diff --git a/doc/spatial-search.rst b/doc/spatial-search.rst index 7404ee9..2223dff 100644 --- a/doc/spatial-search.rst +++ b/doc/spatial-search.rst @@ -1,9 +1,281 @@ +============== Spatial Search ============== -To enable the spatial query you need to add the `spatial_query` plugin to your -ini file (See `Configuration`_). This plugin requires the `spatial_metadata` -plugin. +The spatial extension allows to index datasets with spatial information so they +can be filtered via a spatial query. This includes both via the web interface +(see the `Spatial Search Widget`_) or via the `action API`_, e.g.:: + + POST http://localhost:5000/api/action/package_search + { "q": "Pollution", + "facet": "true", + "facet.field": "country", + "extras": { + "ext_bbox": "-7.535093,49.208494,3.890688,57.372349" } + } + +.. versionchanged:: 2.0.1 + Starting from this version the spatial filter it is also supported on GET + requests: + + http://localhost:5000/api/action/package_search?q=Pollution&ext_bbox=-7.535093,49.208494,3.890688,57.372349 + + +Setup +----- + +To enable the spatial query you need to add the ``spatial_query`` plugin to +your ini file. This plugin requires the ``spatial_metadata`` plugin, eg:: + + ckan.plugins = [other plugins] spatial_metadata spatial_query + +To define which backend to use for the spatial search use the following +configuration option (see `Choosing a backend for the spatial search`_):: + + ckanext.spatial.search_backend = solr + + +Geo-Indexing your datasets +-------------------------- + +Regardless of the backend that you are using, in order to make a dataset +queryable by location, an special extra must be defined, with its key named +'spatial'. The value must be a valid GeoJSON_ geometry, for example:: + + { + "type":"Polygon", + "coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]] + } + +or:: + + { + "type": "Point", + "coordinates": [-3.145,53.078] + } + + +Every time a dataset is created, updated or deleted, the extension will +synchronize the information stored in the extra with the geometry table. + +Choosing a backend for the spatial search ++++++++++++++++++++++++++++++++++++++++++ + +There are different backends supported for the spatial search, it is important +to understand their differences and the necessary setup required when choosing +which one to use. + +The following table summarizes the different spatial search backends: + ++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+ +| Backend | Solr Versions | Supported geometries | Sorting and relevance | Performance with large number of datasets | ++========================+===============+=====================================+===========================================================+===========================================+ +| ``solr`` | 3.1 to 4.x | Bounding Box | Yes, spatial sorting combined with other query parameters | Good | ++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+ +| ``solr-spatial-field`` | 4.x | Bounding Box, Point and Polygon [1] | Not implemented | Good | ++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+ +| ``postgis`` | 1.3 to 4.x | Bounding Box | Partial, only spatial sorting supported [2] | Poor | ++------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+ + + +[1] Requires JTS + +[2] Needs ``ckanext.spatial.use_postgis_sorting`` set to True + + + +We recommend to use the ``solr`` backend whenever possible. Here are more +details about the available options: + +* ``solr`` (Recommended) + This option uses normal Solr fields to index the relevant bits of + information about the geometry and uses an algorithm function to sort + results by relevance, keeping any other non-spatial filtering. It only + supports bounding boxes both for the geometries to be indexed and the + input query shape. It requires `EDisMax`_ query parser, so it will only + work on versions of Solr greater than 3.1 (We recommend using Solr 4.x). + + You will need to add the following fields to your Solr schema file to + enable it:: + + + + + + + + + + + +* ``solr-spatial-field`` + This option uses the `spatial field`_ introduced in Solr 4, which allows + to index points, rectangles and more complex geometries (complex geometries + will require `JTS`_, check the documentation). + Sorting has not yet been implemented, users willing to do so will need to + modify the query using the ``before_search`` extension point. + + You will need to add the following field type and field to your Solr + schema file to enable it (Check the `Solr documentation`__ for more + information on the different parameters, note that you don't need + ``spatialContextFactory`` if you are not using JTS):: + + + + + + + + + + +* ``postgis`` + This is the original implementation of the spatial search. It + does not require any change in the Solr schema and can run on Solr 1.x, + but it is not as efficient as the previous ones. Basically the bounding + box based query is performed in PostGIS first, and the ids of the matched + datasets are added as a filter to the Solr request. This, apart from being + much less efficient, can led to issues on Solr due to size of the requests + (See `Solr configuration issues on legacy PostGIS backend`_). There is + support for a spatial ranking on this backend (setting + ``ckanext.spatial.use_postgis_sorting`` to True on the ini file), but + it can not be combined with any other filtering. + + +Spatial Search Widget +--------------------- + + +.. image:: _static/spatial-search-widget.png + +The extension provides a snippet to add a map widget to the search form, which +allows filtering results by an area of interest. + +To add the map widget to the to the sidebar of the search page, add this to the +dataset search page template +(``myproj/ckanext/myproj/templates/package/search.html``):: + + {% block secondary_content %} + + {% snippet "spatial/snippets/spatial_query.html" %} + + {% endblock %} + +By default the map widget will show the whole world. If you want to set up a +different default extent, you can pass an extra ``default_extent`` to the +snippet, either with a pair of coordinates like this:: + + {% snippet "spatial/snippets/spatial_query.html", default_extent="[[15.62, + -139.21], [64.92, -61.87]]" %} + +or with a GeoJSON object describing a bounding box (note the escaped quotes):: + + {% snippet "spatial/snippets/spatial_query.html", default_extent="{ \"type\": + \"Polygon\", \"coordinates\": [[[74.89, 29.39],[74.89, 38.45], [60.50, + 38.45], [60.50, 29.39], [74.89, 29.39]]]}" %} + +You need to load the ``spatial_metadata`` and ``spatial_query`` plugins to use this +snippet. + + + +Dataset Extent Map +------------------ + +.. image:: _static/dataset-extent-map.png + +Using the snippets provided, if datasets contain a ``spatial`` extra like the +one described in the previous section, a map will be shown on the dataset +details page. + +There are snippets already created to load the map on the left sidebar or in +the main body of the dataset details page, but these can be easily modified to +suit your project needs + +To add a map to the sidebar, add this to the dataset details page template (eg +``myproj/ckanext/myproj/templates/package/read.html``):: + + {% block secondary_content %} + {{ super() }} + + {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} + {% if dataset_extent %} + {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %} + {% endif %} + + {% endblock %} + +For adding the map to the main body, add this:: + + {% block primary_content %} + + + +
+ + + + {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} + {% if dataset_extent %} + {% snippet "spatial/snippets/dataset_map.html", extent=dataset_extent %} + {% endif %} + +
+ {% endblock %} + + +You need to load the ``spatial_metadata`` plugin to use these snippets. + +Legacy Search +------------- + +Solr configuration issues on legacy PostGIS backend ++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. warning:: + + If you find any of the issues described in this section it is strongly + recommended that you consider switching to one of the Solr based backends + which are much more efficient. These notes are just kept for informative + purposes. + + +If using Spatial Query functionality then there is an additional SOLR/Lucene +setting that should be used to set the limit on number of datasets searchable +with a spatial value. + +The setting is ``maxBooleanClauses`` in the solrconfig.xml and the value is the +number of datasets spatially searchable. The default is ``1024`` and this could +be increased to say ``16384``. For a SOLR single core this will probably be at +`/etc/solr/conf/solrconfig.xml`. For a multiple core set-up, there will me +several solrconfig.xml files a couple of levels below `/etc/solr`. For that +case, *all* of the cores' `solrconfig.xml` should have this setting at the new +value. + +Example:: + + 16384 + +This setting is needed because PostGIS spatial query results are fed into SOLR +using a Boolean expression, and the parser for that has a limit. So if your +spatial area contains more than the limit (of which the default is 1024) then +you will get this error:: + + Dataset search error: ('SOLR returned an error running query... + +and in the SOLR logs you see:: + + too many boolean clauses ... Caused by: + org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to + 1024 + + +Legacy API +++++++++++ The extension adds the following call to the CKAN search API, which returns datasets with an extent that intersects with the bounding box provided:: @@ -11,55 +283,15 @@ datasets with an extent that intersects with the bounding box provided:: /api/2/search/dataset/geo?bbox={minx,miny,maxx,maxy}[&crs={srid}] If the bounding box coordinates are not in the same projection as the one -defined in the database, a CRS must be provided, in one of the following -forms: +defined in the database, a CRS must be provided, in one of the following forms: -- urn:ogc:def:crs:EPSG::4326 +- `urn:ogc:def:crs:EPSG::4326` - EPSG:4326 - 4326 -As of CKAN 1.6, you can integrate your spatial query in the full CKAN -search, via the web interface (see the `Spatial Query Widget`_) or -via the `action API`__, e.g.:: - - POST http://localhost:5000/api/action/package_search - { - "q": "Pollution", - "extras": { - "ext_bbox": "-7.535093,49.208494,3.890688,57.372349" - } - } - -__ http://docs.ckan.org/en/latest/apiv3.html - -Geo-Indexing your datasets --------------------------- - -In order to make a dataset queryable by location, an special extra must -be defined, with its key named 'spatial'. The value must be a valid GeoJSON_ -geometry, for example:: - - {"type":"Polygon","coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]]} - -or:: - - { "type": "Point", "coordinates": [-3.145,53.078] } - +.. _action API: http://docs.ckan.org/en/latest/apiv3.html +.. _edismax: http://wiki.apache.org/solr/ExtendedDisMax +.. _JTS: http://www.vividsolutions.com/jts/JTSHome.htm +.. _spatial field: http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 +__ `spatial field`_ .. _GeoJSON: http://geojson.org - -Every time a dataset is created, updated or deleted, the extension will synchronize -the information stored in the extra with the geometry table. - - -Spatial Search Widget ---------------------- - -**Note**: this plugin requires CKAN 1.6 or higher. - -To enable the search map widget you need to add the `spatial_query_widget` plugin to your -ini file (See `Configuration`_). You also need to load both the `spatial_metadata` -and the `spatial_query` plugins. - -When the plugin is enabled, a map widget will be shown in the dataset search form, -where users can refine their searchs drawing an area of interest. -