Merge branch 'docs'
This commit is contained in:
commit
dc587356c1
|
@ -4,3 +4,4 @@ syntax: glob
|
|||
*.swp
|
||||
*~
|
||||
dist
|
||||
build
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "doc/_themes/sphinx-theme-okfn"]
|
||||
path = doc/_themes/sphinx-theme-okfn
|
||||
url = https://github.com/okfn/sphinx-theme-okfn.git
|
1065
README.rst
1065
README.rst
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Sphinx==1.1.3
|
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
Binary file not shown.
After Width: | Height: | Size: 96 KiB |
Binary file not shown.
After Width: | Height: | Size: 232 KiB |
Binary file not shown.
After Width: | Height: | Size: 90 KiB |
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
|
@ -0,0 +1,29 @@
|
|||
<p>An <a href="http://okfn.org">Open Knowledge Foundation</a> project.</p>
|
||||
|
||||
<p class="copyright">
|
||||
{%- if show_copyright %}
|
||||
{% trans copyright=copyright|safe|e %}{{ copyright }}{% endtrans %}<br/>
|
||||
{%- endif %}
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<a href="https://github.com/okfn/ckanext-spatial">Source</a>
|
||||
—
|
||||
<a href="https://github.com/okfn/ckanext-spatial/issues">Issues</a>
|
||||
—
|
||||
<a href="http://lists.okfn.org/mailman/listinfo/ckan-dev">Mailing List</a>
|
||||
—
|
||||
<a href="http://twitter.com/CKANProject">Twitter @CKANProject</a>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Related Projects:
|
||||
<a href="http://thedatahub.org/">The DataHub</a>
|
||||
—
|
||||
<a href="http://datacatalogs.org">DataCatalogs.org</a>
|
||||
—
|
||||
<a href="http://openspending.org">OpenSpending.org</a>
|
||||
—
|
||||
<a href="http://opendatahandbook.org">Open Data Handbook</a>
|
||||
</p>
|
||||
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 59688a6679f3373a57e8d4e60e43f1b249878eb3
|
|
@ -0,0 +1,266 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# ckanext-spatial documentation build configuration file, created by
|
||||
# sphinx-quickstart on Wed Apr 10 17:17:12 2013.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys, os
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
# -- General configuration -----------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.intersphinx']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'ckanext-spatial'
|
||||
copyright = u'2013, Open Knowledge Foundation'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.1'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '0.1'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = []
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||
#default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
#show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
#keep_warnings = False
|
||||
|
||||
exclude_trees = ['.build']
|
||||
|
||||
|
||||
# -- Options for HTML output ---------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#html_theme = 'default'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
#html_theme_path = []
|
||||
sys.path.append(os.path.abspath('_themes'))
|
||||
html_theme_path = ['_themes']
|
||||
html_theme = 'sphinx-theme-okfn'
|
||||
html_theme_options = {
|
||||
'logo_icon': 'ckanlogo.png',
|
||||
'show_version': False,
|
||||
'show_okfn_logo': False,
|
||||
}
|
||||
html_sidebars = {
|
||||
'**': ['globaltoc.html']
|
||||
}
|
||||
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
#html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
#html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
#html_sidebars = {}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
#html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
#html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
#html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
#html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
#html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'ckanext-spatialdoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output --------------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||
latex_documents = [
|
||||
('index', 'ckanext-spatial.tex', u'ckanext-spatial Documentation',
|
||||
u'Open Knowledge Foundation', 'manual'),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output --------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('index', 'ckanext-spatial', u'ckanext-spatial Documentation',
|
||||
[u'Open Knowledge Foundation'], 1)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output ------------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('index', 'ckanext-spatial', u'ckanext-spatial Documentation',
|
||||
u'Open Knowledge Foundation', 'ckanext-spatial', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#texinfo_show_urls = 'footnote'
|
||||
|
||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||
#texinfo_no_detailmenu = False
|
||||
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {'http://docs.python.org/': None}
|
|
@ -0,0 +1,260 @@
|
|||
===========
|
||||
CSW support
|
||||
===========
|
||||
|
||||
The extension provides the support for the CSW_ standard, a specification from
|
||||
the Open Geospatial Consortium for exposing geospatial catalogues over the web.
|
||||
|
||||
This support consists of:
|
||||
|
||||
* Ability to import records from CSW servers with the CSW harvester. See
|
||||
:doc:`harvesters` for more details.
|
||||
|
||||
* Integration with pycsw_ to provide a fully compliat CSW interface for
|
||||
harvested records. This integration is described on the following sections.
|
||||
|
||||
|
||||
ckan-pycsw
|
||||
----------
|
||||
|
||||
The spatial extension offers the ``ckan-pycsw`` command, which allows to expose
|
||||
the spatial datasets harvested from other sources in a CSW interface. This is
|
||||
powered by pycsw_, which fully implements the OGC CSW specification.
|
||||
|
||||
How it works
|
||||
++++++++++++
|
||||
|
||||
|
||||
The current implementation is based on CKAN and pycsw being loosely integrated
|
||||
via the CKAN API. pycsw will be generally installed in the same server as CKAN
|
||||
(although it can also be run on a separate one), and the synchronization
|
||||
command will be run regularly to keep the records on the pycsw repository up to
|
||||
date. This is done using the CKAN API to get all the datasets identifiers (more
|
||||
precisely the ones from datasets that have been harvested) and then deciding
|
||||
which ones need to be created, updated or deleted on the pycsw repository. For
|
||||
those that need to be created or updated, the original harvested spatial
|
||||
document (ie ISO 19139) is requested from CKAN, and it is then imported using
|
||||
pycsw internal functions::
|
||||
|
||||
Harvested
|
||||
datasets
|
||||
+
|
||||
|
|
||||
v
|
||||
+--------+ +---------+
|
||||
| | CKAN API | |
|
||||
| CKAN | +------------> | pycsw | +------> CSW
|
||||
| | | |
|
||||
+--------+ +---------+
|
||||
|
||||
|
||||
Remember, only datasets that were harvested with the :doc:`harvesters`
|
||||
can currently be exposed via pycsw.
|
||||
|
||||
All necessary tasks are done with the ``ckan-pycsw`` command. To get more
|
||||
details of its usage, run the following::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw --help
|
||||
|
||||
|
||||
Setup
|
||||
+++++
|
||||
|
||||
1. Install pycsw. There are several options for this, depending on your
|
||||
server setup, check the `pycsw documentation`_.
|
||||
|
||||
.. note:: CKAN integration requires at least pycsw version 1.6.1. Make sure
|
||||
to install at least this version.
|
||||
|
||||
The following instructions assume that you have installed CKAN via a
|
||||
`package install`_ and should be run as root, but the steps are the same if
|
||||
you are setting it up in another location::
|
||||
|
||||
cd /usr/lib/ckan/default/src
|
||||
source ../bin/activate
|
||||
|
||||
# From now on the virtualenv should be activated
|
||||
|
||||
git clone https://github.com/geopython/pycsw.git
|
||||
cd pycsw
|
||||
# Remember to use at least pycsw 1.6.1
|
||||
git checkout 1.6.1
|
||||
pip install -e .
|
||||
python setup.py build
|
||||
python setup.py install
|
||||
|
||||
2. Create a database for pycsw. In theory you can use the same database that
|
||||
CKAN is using, but if you want to keep them separated, use the following
|
||||
command to create a new one (we'll use the same default user though)::
|
||||
|
||||
sudo -u postgres createdb -O ckan_default pycsw -E utf-8
|
||||
|
||||
It is strongly recommended that you install PostGIS in the pycsw databaset,
|
||||
so its spatial functions are used. See the :ref:`install_postgis`
|
||||
section for details.
|
||||
|
||||
3. Configure pycsw. An example configuration file is included on the source::
|
||||
|
||||
cp default-sample.cfg default.cfg
|
||||
|
||||
To keep things tidy we will create a symlink to this file on the CKAN
|
||||
configuration directory::
|
||||
|
||||
ln -s /usr/lib/ckan/default/src/pycsw/default.cfg /etc/ckan/default/pycsw.cfg
|
||||
|
||||
Open the file with your favourite editor. The main settings you should tweak
|
||||
are ``server.home`` and ``repository.database``::
|
||||
|
||||
[server]
|
||||
home=/usr/lib/ckan/default/src/pycsw
|
||||
...
|
||||
[repository]
|
||||
database=postgresql://ckan_default:pass@localhost/pycsw
|
||||
|
||||
The rest of the options are described `here <http://pycsw.org/docs/configuration.html>`_.
|
||||
|
||||
4. Setup the pycsw table. This is done with the ``ckan-pycsw`` paster command
|
||||
(Remember to have the virtualenv activated when running it)::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw setup -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
At this point you should be ready to run pycsw with the wsgi script that it
|
||||
includes::
|
||||
|
||||
cd /usr/lib/ckan/default/src/pycsw
|
||||
python csw.wsgi
|
||||
|
||||
This will run pycsw at http://localhost:8000. Visiting the following URL
|
||||
should return you the Capabilities file:
|
||||
|
||||
http://localhost:8000/?service=CSW&version=2.0.2&request=GetCapabilities
|
||||
|
||||
5. Load the CKAN datasets into pycsw. Again, we will use the ``ckan-pycsw``
|
||||
command for this::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
.. note:: If you get errors similar to this one, this is caused by
|
||||
limitations on the pycsw model definition. This should be fixed in
|
||||
future versions of pycsw::
|
||||
|
||||
ERROR: not inserted f8d48eaf-780b-40b8-a502-7a903fde5b1c Error:ERROR: value too long for type character varying(256)
|
||||
|
||||
|
||||
When the loading is finished, check that results are returned when visiting
|
||||
this link:
|
||||
|
||||
http://localhost:8000/?request=GetRecords&service=CSW&version=2.0.2&resultType=results&outputSchema=http://www.isotc211.org/2005/gmd&typeNames=csw:Record&elementSetName=summary
|
||||
|
||||
The ``numberOfRecordsMatched`` should match the number of harvested datasets
|
||||
in CKAN (minus import errors). If you run the command again new or udpated
|
||||
datasets will be synchronized and deleted datasets from CKAN will be removed
|
||||
from pycsw as well.
|
||||
|
||||
Running it on production site
|
||||
+++++++++++++++++++++++++++++
|
||||
|
||||
On a production site you probably want to run the load command regularly to
|
||||
keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN.
|
||||
|
||||
* To run the load command regularly you can set up a cron job. Type ``crontab -e``
|
||||
and copy the following lines::
|
||||
|
||||
# m h dom mon dow command
|
||||
0 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-spatial ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
This particular example will run the load command every hour. You can of
|
||||
course modify this periodicity, for instance reducing it for huge instances.
|
||||
This `Wikipedia page <http://en.wikipedia.org/wiki/Cron#CRON_expression>`_
|
||||
has a good overview of the crontab syntax.
|
||||
|
||||
* To run pycsw under Apache check the pycsw `installation documentation <http://pycsw.org/docs/installation.html#running-on-wsgi>`_
|
||||
or follow this quick steps (they assume the paths used on the previous steps):
|
||||
|
||||
- Edit ``/etc/apache2/sites-available/ckan_default`` and add the following
|
||||
line just before the existing ``WSGIScriptAlias`` directive::
|
||||
|
||||
WSGIScriptAlias /csw /usr/lib/ckan/default/src/pycsw/csw.wsgi
|
||||
|
||||
- Edit the ``/usr/lib/ckan/default/src/pycsw/csw.wsgi`` file and add these two
|
||||
lines just after the imports on the top of the file::
|
||||
|
||||
activate_this = os.path.join('/usr/lib/ckan/default/bin/activate_this.py')
|
||||
execfile(activate_this, {"__file__":activate_this})
|
||||
|
||||
We need these to activate the virtualenv where we installed pycsw into.
|
||||
|
||||
- Restart Apache::
|
||||
|
||||
service apache2 restart
|
||||
|
||||
pycsw should be now accessible at http://localhost/csw
|
||||
|
||||
|
||||
Legacy plugins and libraries
|
||||
----------------------------
|
||||
|
||||
|
||||
Old CSW Server
|
||||
++++++++++++++
|
||||
|
||||
.. warning:: **Deprecated:** The old csw plugin has been deprecated, please see `ckan-pycsw`_
|
||||
for details on how to integrate with pycsw.
|
||||
|
||||
To activate it, add the ``csw_server`` plugin to your ini file.
|
||||
|
||||
Only harvested datasets are served by this CSW Server. This is because
|
||||
the harvested document is the one that is served, not something derived
|
||||
from the CKAN Dataset object. Datasets that are created in CKAN by methods
|
||||
other than harvesting are not served.
|
||||
|
||||
The currently supported methods with this CSW Server are:
|
||||
* GetCapabilities
|
||||
* GetRecords
|
||||
* GetRecordById
|
||||
|
||||
For example you can ask the capabilities of the CSW server installed into CKAN
|
||||
running on 127.0.0.1:5000 like this::
|
||||
|
||||
curl 'http://127.0.0.1:5000/csw?request=GetCapabilities&service=CSW&version=2.0.2'
|
||||
|
||||
And get a list of the records like this::
|
||||
|
||||
curl 'http://127.0.0.1:5000/csw?request=GetRecords&service=CSW&resultType=results&elementSetName=full&version=2.0.2'
|
||||
|
||||
The standard CSW response is in XML format.
|
||||
|
||||
cswinfo
|
||||
+++++++
|
||||
|
||||
The command-line tool ``cswinfo`` allows to make queries on CSW servers and
|
||||
returns the info in nicely formatted JSON. This may be more convenient to type
|
||||
than using, for example, curl.
|
||||
|
||||
Currently available queries are:
|
||||
* getcapabilities
|
||||
* getidentifiers
|
||||
* getrecords
|
||||
* getrecordbyid
|
||||
|
||||
For details, type::
|
||||
|
||||
cswinfo csw -h
|
||||
|
||||
There are options for querying by only certain types, keywords and typenames
|
||||
as well as configuring the ElementSetName.
|
||||
|
||||
The equivalent example to the one above for asking the cabailities is::
|
||||
|
||||
$ cswinfo csw getcapabilities http://127.0.0.1:5000/csw
|
||||
|
||||
OWSLib is the library used to actually perform the queries.
|
||||
|
||||
.. _pycsw: http://pycsw.org
|
||||
.. _pycsw documentation: http://pycsw.org/docs/installation.html
|
||||
.. _package install: http://docs.ckan.org/en/latest/install-from-package.html
|
||||
.. _CSW: http://www.opengeospatial.org/standards/cat
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
==================
|
||||
Spatial Harvesters
|
||||
==================
|
||||
|
||||
Overview and Configuration
|
||||
--------------------------
|
||||
|
||||
The spatial extension provides some harvesters for importing ISO19139-based
|
||||
metadata into CKAN, as well as providing a base class for writing new ones.
|
||||
The harvesters use the interface provided by ckanext-harvest_, so you will need
|
||||
to install and set it up first.
|
||||
|
||||
Once ckanext-harvest is installed, you can add the following plugins to your
|
||||
ini file to enable the different harvesters (If you are upgrading from a
|
||||
previous version to CKAN 2.0 see legacy_harvesters_):
|
||||
|
||||
* ``csw_harvester`` - CSW server
|
||||
* ``waf_harvester`` - WAF (Web Accessible Folder): An online accessible index
|
||||
page with links to metadata documents
|
||||
* ``doc_harvester`` - A single online accessible metadata document.
|
||||
|
||||
Have a look at the `ckanext-harvest documentation`_ if you want to have an
|
||||
overview of how the CKAN harvesters work, but basically there are three
|
||||
separate stages:
|
||||
|
||||
* gather_stage - Aggregates all the remote identifiers for a particular source
|
||||
(eg identifiers for a CSW server, files for a WAF).
|
||||
* fetch_stage - Fetches all the remote documents and stores them on the
|
||||
database.
|
||||
* import_stage - Performs all the processing for transforming the remote
|
||||
content into a CKAN dataset: validates the document, parses it, converts it
|
||||
to a CKAN dataset dict and saves it in the database.
|
||||
|
||||
The extension provides different XSD and schematron based validators. You can
|
||||
specify which validators to use for the remote documents with the following
|
||||
configuration option::
|
||||
|
||||
ckan.spatial.validator.profiles = iso19193eden
|
||||
|
||||
By default, the import stage will stop if the validation of the harvested
|
||||
document fails. This can be modified setting the
|
||||
``ckanext.spatial.harvest.continue_on_validation_errors`` to True. The setting
|
||||
can also be applied at the source level setting to True the
|
||||
``continue_on_validation_errors`` key on the source configuration object.
|
||||
|
||||
By default the harvesting actions (eg creating or updating datasets) will be
|
||||
performed by the internal site admin user. This is the recommended setting,
|
||||
but if necessary, it can be overridden with the
|
||||
``ckanext.spatial.harvest.user_name`` config option, eg to support the old
|
||||
hardcoded 'harvest' user::
|
||||
|
||||
ckanext.spatial.harvest.user_name = harvest
|
||||
|
||||
Customizing the harvesters
|
||||
--------------------------
|
||||
|
||||
The default harvesters provided in this extension can be overriden from
|
||||
extensions to customize to your needs. You can either extend ``CswHarvester``,
|
||||
``WAFfHarverster`` or the main ``SpatialHarvester`` class. There are some
|
||||
extension points that can be safely overriden from your extension. Probably the
|
||||
most useful is ``get_package_dict``, which allows to tweak the dataset fields
|
||||
before creating or updating them. ``transform_to_iso`` allows to hook into
|
||||
transformation mechanisms to transform other formats into ISO1939, the only one
|
||||
directly supported byt he spatial harvesters. Finally, the whole
|
||||
``import_stage`` can be overriden if the default logic does not suit your
|
||||
needs.
|
||||
|
||||
Check the source code of ``ckanext/spatial/harvesters/base.py`` for more
|
||||
details on these functions.
|
||||
|
||||
The `ckanext-geodatagov`_ extension contains live examples on how to extend
|
||||
the default spatial harvesters and create new ones for other spatial services
|
||||
like ArcGIS REST APIs.
|
||||
|
||||
|
||||
Harvest Metadata API
|
||||
--------------------
|
||||
|
||||
This plugin allows to access the actual harvested document via API requests.
|
||||
It is enabled with the following plugin::
|
||||
|
||||
ckan.plugins = spatial_harvest_metadata_api
|
||||
|
||||
(It was previously known as ``inspire_api``)
|
||||
|
||||
To view the harvest objects (containing the harvested metadata) in the web
|
||||
interface, these controller locations are added:
|
||||
|
||||
* raw XML document: /harvest/object/{id}
|
||||
* HTML representation: /harvest/object/{id}/html
|
||||
|
||||
.. note:: The old URLs are now deprecated and redirect to the previously
|
||||
mentioned:
|
||||
|
||||
* /api/2/rest/harvestobject/<id>/xml
|
||||
* /api/2/rest/harvestobject/<id>/html
|
||||
|
||||
|
||||
For those harvest objects that have an original document (which was transformed
|
||||
to ISO), this can be accessed via:
|
||||
|
||||
* raw XML document: /harvest/object/{id}/original
|
||||
* HTML representation: /harvest/object/{id}/html/original
|
||||
|
||||
The HTML representation is created via an XSLT transformation. The extension
|
||||
provides an XSLT file that should work on ISO 19139 based documents, but if you
|
||||
want to use your own on your extension, you can override it using the following
|
||||
configuration options::
|
||||
|
||||
ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt
|
||||
ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt
|
||||
|
||||
If your project does not transform different metadata types you can ignore the
|
||||
second option.
|
||||
|
||||
.. _legacy_harvesters:
|
||||
|
||||
Legacy harvesters
|
||||
-----------------
|
||||
|
||||
Prior to CKAN 2.0, the spatial harvesters available on this extension were
|
||||
based on the GEMINI2 format, an ISO19139 profile used by the UK Location
|
||||
Programme, and the logic for creating or updating datasets and the resulting
|
||||
fields were somehow adapted to the needs for this particular project. The
|
||||
harvesters were still generic enough and should work fine with other ISO19139
|
||||
based sources, but extra care has been put to make the new harvesters more
|
||||
generic and robust, so these ones should only be used on existing instances:
|
||||
|
||||
* ``gemini_csw_harvester``
|
||||
* ``gemini_waf_harvester``
|
||||
* ``gemini_doc_harvester``
|
||||
|
||||
If you are using these harvesters please consider upgrading to the new
|
||||
versions described on the previous section.
|
||||
|
||||
|
||||
.. todo:: Validation library details
|
||||
|
||||
|
||||
.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest
|
||||
.. _ckanext-harvest documentation: https://github.com/okfn/ckanext-harvest#the-harvesting-interface
|
||||
.. _ckanext-geodatagov: https://github.com/okfn/ckanext-geodatagov/blob/master/ckanext/geodatagov/harvesters/
|
|
@ -0,0 +1,33 @@
|
|||
==============================================
|
||||
ckanext-spatial - Geo related plugins for CKAN
|
||||
==============================================
|
||||
|
||||
This extension contains plugins that add geospatial capabilities to CKAN_.
|
||||
|
||||
You should have a CKAN instance installed before adding these plugins. Head to
|
||||
the `CKAN documentation`_ for information on how to set up CKAN.
|
||||
|
||||
The extension adds a spatial field to the default CKAN dataset schema,
|
||||
using PostGIS_ as the backend. This allows to perform spatial queries and
|
||||
display the dataset extent on the frontend. It also provides harvesters to
|
||||
import geospatial metadata into CKAN from other sources, as well as commands
|
||||
to support the CSW standard. Finally, it also includes plugins to preview
|
||||
spatial formats such as GeoJSON_.
|
||||
|
||||
|
||||
Contents:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
install
|
||||
spatial-search
|
||||
harvesters
|
||||
csw
|
||||
previews
|
||||
|
||||
.. _CKAN: http://ckan.org
|
||||
.. _CKAN Documentation: http://docs.ckan.org
|
||||
.. _PostGIS: http://postgis.org
|
||||
.. _GeoJSON: http://geojson.org
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
======================
|
||||
Installation and Setup
|
||||
======================
|
||||
|
||||
Check the Troubleshooting_ section if you get errors at any stage.
|
||||
|
||||
.. _install_postgis:
|
||||
|
||||
Install PostGIS and system packages
|
||||
-----------------------------------
|
||||
|
||||
.. note:: If you *only* want to load the :doc:`previews` you don't need to
|
||||
install any of the packages on this section and can skip to the
|
||||
next one.
|
||||
|
||||
.. note:: The package names and paths shown are the defaults on an Ubuntu
|
||||
12.04 install (PostgreSQL 9.1 and PostGIS 1.5). Adjust the
|
||||
package names and the paths if you are using a different version of
|
||||
any of them.
|
||||
|
||||
All commands assume an existing CKAN database named ``ckan_default``.
|
||||
|
||||
|
||||
#. Install PostGIS::
|
||||
|
||||
sudo apt-get install postgresql-9.1-postgis
|
||||
|
||||
#. Run the following commands. The first one will create the necessary
|
||||
tables and functions in the database, and the second will populate
|
||||
the spatial reference table::
|
||||
|
||||
sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/postgis.sql
|
||||
sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/9.1/contrib/postgis-1.5/spatial_ref_sys.sql
|
||||
|
||||
.. note:: If using PostgreSQL 8.x, run the following command to enable
|
||||
the necessary language::
|
||||
|
||||
sudo -u postgres createlang plpgsql ckan_default
|
||||
|
||||
#. Change the owner to spatial tables to the CKAN user to avoid errors later
|
||||
on::
|
||||
|
||||
ALTER TABLE spatial_ref_sys OWNER TO ckan_default;
|
||||
ALTER TABLE geometry_columns OWNER TO ckan_default;
|
||||
|
||||
#. Execute the following command to see if PostGIS was properly
|
||||
installed::
|
||||
|
||||
sudo -u postgres psql -d ckan_default -c "SELECT postgis_full_version()"
|
||||
|
||||
You should get something like::
|
||||
|
||||
postgis_full_version
|
||||
------------------------------------------------------------------------------------------------------
|
||||
POSTGIS="1.5.2" GEOS="3.2.2-CAPI-1.6.2" PROJ="Rel. 4.7.1, 23 September 2009" LIBXML="2.7.7" USE_STATS
|
||||
(1 row)
|
||||
|
||||
|
||||
#. Install some other packages needed by the extension dependencies::
|
||||
|
||||
sudo apt-get install python-dev libxml2-dev libxslt1-dev libgeos-c1
|
||||
|
||||
|
||||
Install the extension
|
||||
---------------------
|
||||
|
||||
1. Install this extension into your python environment (where CKAN is also
|
||||
installed).
|
||||
|
||||
.. note:: Depending on the CKAN core version you are targeting you will need
|
||||
to use a different branch from the extension.
|
||||
|
||||
For a production site, use the ``stable`` branch, unless there is a specific
|
||||
branch that targets the CKAN core version that you are using.
|
||||
|
||||
To target the latest CKAN core release::
|
||||
|
||||
(pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@stable#egg=ckanext-spatial
|
||||
|
||||
To target an old release (if a release branch exists, otherwise use
|
||||
``stable``)::
|
||||
|
||||
(pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git@release-v1.8#egg=ckanext-spatial
|
||||
|
||||
To target CKAN ``master``, use the extension ``master`` branch (ie no
|
||||
branch defined)::
|
||||
|
||||
(pyenv) $ pip install -e git+https://github.com/okfn/ckanext-spatial.git#egg=ckanext-spatial
|
||||
|
||||
|
||||
2. Install the rest of python modules required by the extension::
|
||||
|
||||
(pyenv) $ pip install -r pip-requirements.txt
|
||||
|
||||
To use the :doc:`harvesters`, you will need to install and configure the
|
||||
harvester extension: `ckanext-harvest`_. Follow the install instructions on
|
||||
its documentation for details on how to set it up.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
Once PostGIS is installed and configured in the database the extension needs
|
||||
to create a table to store the datasets extent, called ``package_extent``.
|
||||
|
||||
This will happen automatically the next CKAN is restarted after adding the
|
||||
plugins on the configuration ini file (eg when restarting Apache).
|
||||
|
||||
If for some reason you need to explicitly create the table beforehand, you can
|
||||
do it with the following command (with the virtualenv activated)::
|
||||
|
||||
(pyenv) $ paster --plugin=ckanext-spatial spatial initdb [srid] --config=mysite.ini
|
||||
|
||||
You can define the SRID of the geometry column. Default is 4326. If you are not
|
||||
familiar with projections, we recommend to use the default value. To know more
|
||||
about PostGIS tables, see :doc:`postgis-manual`
|
||||
|
||||
Each plugin can be enabled by adding its name to the ``ckan.plugins`` in the
|
||||
CKAN ini file. For example::
|
||||
|
||||
ckan.plugins = spatial_metadata spatial_query
|
||||
|
||||
When enabling the spatial metadata, you can define the projection in which
|
||||
extents are stored in the database with the following option. Use the EPSG code
|
||||
as an integer (e.g 4326, 4258, 27700, etc). It defaults to 4326::
|
||||
|
||||
ckan.spatial.srid = 4326
|
||||
|
||||
|
||||
Troubleshooting
|
||||
---------------
|
||||
|
||||
Here are some common problems you may find when installing or using the
|
||||
extension:
|
||||
|
||||
When initializing the spatial tables
|
||||
++++++++++++++++++++++++++++++++++++
|
||||
|
||||
::
|
||||
|
||||
LINE 1: SELECT AddGeometryColumn('package_extent','the_geom', E'4326...
|
||||
^
|
||||
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
|
||||
"SELECT AddGeometryColumn('package_extent','the_geom', %s, 'GEOMETRY', 2)" ('4326',)
|
||||
|
||||
|
||||
PostGIS was not installed correctly. Please check the "Setting up PostGIS"
|
||||
section.
|
||||
|
||||
::
|
||||
|
||||
sqlalchemy.exc.ProgrammingError: (ProgrammingError) permission denied for relation spatial_ref_sys
|
||||
|
||||
|
||||
The user accessing the ckan database needs to be owner (or have permissions)
|
||||
of the geometry_columns and spatial_ref_sys tables.
|
||||
|
||||
When migrating to an existing PostGIS database
|
||||
++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
If you are loading a database dump to an existing PostGIS database, you may
|
||||
find errors like ::
|
||||
|
||||
ERROR: type "spheroid" already exists
|
||||
|
||||
This means that the PostGIS functions are installed, but you may need to
|
||||
create the necessary tables anyway. You can force psql to ignore these
|
||||
errors and continue the transaction with the ON_ERROR_ROLLBACK=on::
|
||||
|
||||
sudo -u postgres psql -d ckan_default -f /usr/share/postgresql/8.4/contrib/postgis-1.5/postgis.sql -v ON_ERROR_ROLLBACK=on
|
||||
|
||||
You will still need to populate the spatial_ref_sys table and change the
|
||||
tables permissions. Refer to the previous section for details on how to do
|
||||
it.
|
||||
|
||||
When performing a spatial query
|
||||
+++++++++++++++++++++++++++++++
|
||||
|
||||
::
|
||||
|
||||
InvalidRequestError: SQL expression, column, or mapped entity expected - got '<class 'ckanext.spatial.model.PackageExtent'>'
|
||||
|
||||
The spatial model has not been loaded. You probably forgot to add the
|
||||
``spatial_metadata`` plugin to your ini configuration file.
|
||||
|
||||
::
|
||||
|
||||
InternalError: (InternalError) Operation on two geometries with different SRIDs
|
||||
|
||||
The spatial reference system of the database geometry column and the one
|
||||
used by CKAN differ. Remember, if you are using a different spatial
|
||||
reference system from the default one (WGS 84 lat/lon, EPSG:4326), you must
|
||||
define it in the configuration file as follows::
|
||||
|
||||
ckan.spatial.srid = 4258
|
||||
|
||||
When running the spatial harvesters
|
||||
+++++++++++++++++++++++++++++++++++
|
||||
|
||||
::
|
||||
|
||||
File "xmlschema.pxi", line 102, in lxml.etree.XMLSchema.__init__ (src/lxml/lxml.etree.c:154475)
|
||||
lxml.etree.XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml}doubleList'., line 1
|
||||
|
||||
The XSD validation used by the spatial harvesters requires libxml2 ersion 2.9.
|
||||
|
||||
With CKAN you would probably have installed an older version from your
|
||||
distribution. (e.g. with ``sudo apt-get install libxml2-dev``). You need to
|
||||
find the SO files for the old version::
|
||||
|
||||
$ find /usr -name "libxml2.so"
|
||||
|
||||
For example, it may show it here: ``/usr/lib/x86_64-linux-gnu/libxml2.so``.
|
||||
The directory of the SO file is used as a parameter to the ``configure`` next
|
||||
on.
|
||||
|
||||
Download the libxml2 source::
|
||||
|
||||
$ cd ~
|
||||
$ wget ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz
|
||||
|
||||
Unzip it::
|
||||
|
||||
$ tar zxvf libxml2-2.9.0.tar.gz
|
||||
$ cd libxml2-2.9.0/
|
||||
|
||||
Configure with the SO directory you found before::
|
||||
|
||||
$ ./configure --libdir=/usr/lib/x86_64-linux-gnu
|
||||
|
||||
Now make it and install it::
|
||||
|
||||
$ make
|
||||
$ sudo make install
|
||||
|
||||
Now check the install by running xmllint::
|
||||
|
||||
$ xmllint --version
|
||||
xmllint: using libxml version 20900
|
||||
compiled with: Threads Tree Output Push Reader Patterns Writer SAXv1 FTP HTTP DTDValid HTML Legacy C14N Catalog XPath XPointer XInclude Iconv ISO8859X Unicode Regexps Automata Expr Schemas Schematron Modules Debug Zlib
|
||||
|
||||
.. _PostGIS: http://postgis.org
|
||||
.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest
|
|
@ -0,0 +1,43 @@
|
|||
==========================
|
||||
Setting up a PostGIS table
|
||||
==========================
|
||||
|
||||
.. note:: The extension will generally set up the table automatically for you,
|
||||
and also running the ``initdb`` command will have the same effect. This
|
||||
section just describes what's going on for those who want to know more.
|
||||
|
||||
To be able to store geometries and perform spatial operations, PostGIS_
|
||||
needs to work with geometry fields. Geometry fields should always be
|
||||
added via the ``AddGeometryColumn`` function::
|
||||
|
||||
CREATE TABLE package_extent(
|
||||
package_id text PRIMARY KEY
|
||||
);
|
||||
|
||||
ALTER TABLE package_extent OWNER TO ckan_default;
|
||||
|
||||
SELECT AddGeometryColumn('package_extent','the_geom', 4326, 'GEOMETRY', 2);
|
||||
|
||||
This will add a geometry column in the ``package_extent`` table called
|
||||
``the_geom``, with the spatial reference system EPSG:4326. The stored
|
||||
geometries will be polygons, with 2 dimensions (The CKAN table uses the
|
||||
GEOMETRY type to support multiple geometry types).
|
||||
|
||||
Have a look a the table definition, and see how PostGIS has created
|
||||
some constraints to ensure that the geometries follow the parameters
|
||||
defined in the geometry column creation::
|
||||
|
||||
# \d package_extent
|
||||
|
||||
Table "public.package_extent"
|
||||
Column | Type | Modifiers
|
||||
------------+----------+-----------
|
||||
package_id | text | not null
|
||||
the_geom | geometry |
|
||||
Indexes:
|
||||
"package_extent_pkey" PRIMARY KEY, btree (package_id)
|
||||
Check constraints:
|
||||
"enforce_dims_the_geom" CHECK (st_ndims(the_geom) = 2)
|
||||
"enforce_srid_the_geom" CHECK (st_srid(the_geom) = 4326)
|
||||
|
||||
.. _PostGIS: http://postgis.org
|
|
@ -0,0 +1,68 @@
|
|||
============================
|
||||
Previews for Spatial Formats
|
||||
============================
|
||||
|
||||
The extension includes some plugins that implement the IResourcePreview_
|
||||
interface, allowing to preview spatial resource files. They are based in
|
||||
popular Javascript mapping libraries and should be really easy to extend and
|
||||
adapt to your own needs.
|
||||
|
||||
|
||||
GeoJSON Preview
|
||||
---------------
|
||||
|
||||
.. image:: _static/preview-geojson.png
|
||||
|
||||
The GeoJSON previewer is based on Leaflet_. It will render GeoJSON_ files on a
|
||||
map and add a popup showing the features properties, for those resources that
|
||||
have a format of ``geojson`` or ``gjosn``.
|
||||
|
||||
To enable the GeoJSON previewer you need to add the ``geojson_preview`` plugin
|
||||
to your ini file. This plugin also requires the `resource_proxy`_
|
||||
plugin (Make sure you load the ``resource_proxy`` plugin before any other
|
||||
from the spatial extension)::
|
||||
|
||||
ckan.plugins = resource_proxy geojson_preview
|
||||
|
||||
|
||||
WMS Preview
|
||||
-----------
|
||||
|
||||
.. image:: _static/preview-wms.png
|
||||
|
||||
The WMS previewer is based o OpenLayers_. When the plugin is enabled, if
|
||||
datasets contain a resource that has ``wms`` format, the resource page will
|
||||
load a simple map viewer that will attempt to load the remote service layers,
|
||||
based on the GetCapabilities response.
|
||||
|
||||
To enable the WMS previewer you need to add the ``wms_preview`` plugin to your
|
||||
ini file. This plugin also requires the `resource_proxy`_
|
||||
plugin (Make sure you load the ``resource_proxy`` plugin before any other
|
||||
from the spatial extension::
|
||||
|
||||
ckan.plugins = resource_proxy wms_preview
|
||||
|
||||
.. note:: Please note that the WMS previewer included in ckanext-spatial is
|
||||
just a proof of concept and has important limitations, and is
|
||||
just intended as a bootstrap for developers willing to build a more
|
||||
sophisticated one.
|
||||
|
||||
Some projects that have built more advanced map viewers and
|
||||
integrated them with CKAN include:
|
||||
|
||||
* Data.gov.uk (http://data.gov.uk):
|
||||
- https://github.com/datagovuk/ckanext-dgu
|
||||
- https://github.com/datagovuk/ckanext-os
|
||||
|
||||
* Catalog.data.gov (http://catalog.data.gov):
|
||||
- https://github.com/okfn/ckanext-geodatagov
|
||||
- https://github.com/chilukey/viewer
|
||||
|
||||
|
||||
|
||||
.. _IResourcePreview: http://docs.ckan.org/en/latest/writing-extensions.html#ckan.plugins.interfaces.IResourcePreview
|
||||
.. _resource_proxy: http://docs.ckan.org/en/latest/data-viewer.html#viewing-remote-resources-the-resource-proxy
|
||||
.. _Leaflet: http://leafletjs.org
|
||||
.. _GeoJSON: http://geojson.org
|
||||
.. _OpenLayers: http://openlayers.org
|
||||
|
|
@ -0,0 +1,297 @@
|
|||
==============
|
||||
Spatial Search
|
||||
==============
|
||||
|
||||
The spatial extension allows to index datasets with spatial information so they
|
||||
can be filtered via a spatial query. This includes both via the web interface
|
||||
(see the `Spatial Search Widget`_) or via the `action API`_, e.g.::
|
||||
|
||||
POST http://localhost:5000/api/action/package_search
|
||||
{ "q": "Pollution",
|
||||
"facet": "true",
|
||||
"facet.field": "country",
|
||||
"extras": {
|
||||
"ext_bbox": "-7.535093,49.208494,3.890688,57.372349" }
|
||||
}
|
||||
|
||||
.. versionchanged:: 2.0.1
|
||||
Starting from this version the spatial filter it is also supported on GET
|
||||
requests:
|
||||
|
||||
http://localhost:5000/api/action/package_search?q=Pollution&ext_bbox=-7.535093,49.208494,3.890688,57.372349
|
||||
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
||||
To enable the spatial query you need to add the ``spatial_query`` plugin to
|
||||
your ini file. This plugin requires the ``spatial_metadata`` plugin, eg::
|
||||
|
||||
ckan.plugins = [other plugins] spatial_metadata spatial_query
|
||||
|
||||
To define which backend to use for the spatial search use the following
|
||||
configuration option (see `Choosing a backend for the spatial search`_)::
|
||||
|
||||
ckanext.spatial.search_backend = solr
|
||||
|
||||
|
||||
Geo-Indexing your datasets
|
||||
--------------------------
|
||||
|
||||
Regardless of the backend that you are using, in order to make a dataset
|
||||
queryable by location, an special extra must be defined, with its key named
|
||||
'spatial'. The value must be a valid GeoJSON_ geometry, for example::
|
||||
|
||||
{
|
||||
"type":"Polygon",
|
||||
"coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]]
|
||||
}
|
||||
|
||||
or::
|
||||
|
||||
{
|
||||
"type": "Point",
|
||||
"coordinates": [-3.145,53.078]
|
||||
}
|
||||
|
||||
|
||||
Every time a dataset is created, updated or deleted, the extension will
|
||||
synchronize the information stored in the extra with the geometry table.
|
||||
|
||||
Choosing a backend for the spatial search
|
||||
+++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
There are different backends supported for the spatial search, it is important
|
||||
to understand their differences and the necessary setup required when choosing
|
||||
which one to use.
|
||||
|
||||
The following table summarizes the different spatial search backends:
|
||||
|
||||
+------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
|
||||
| Backend | Solr Versions | Supported geometries | Sorting and relevance | Performance with large number of datasets |
|
||||
+========================+===============+=====================================+===========================================================+===========================================+
|
||||
| ``solr`` | 3.1 to 4.x | Bounding Box | Yes, spatial sorting combined with other query parameters | Good |
|
||||
+------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
|
||||
| ``solr-spatial-field`` | 4.x | Bounding Box, Point and Polygon [1] | Not implemented | Good |
|
||||
+------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
|
||||
| ``postgis`` | 1.3 to 4.x | Bounding Box | Partial, only spatial sorting supported [2] | Poor |
|
||||
+------------------------+---------------+-------------------------------------+-----------------------------------------------------------+-------------------------------------------+
|
||||
|
||||
|
||||
[1] Requires JTS
|
||||
|
||||
[2] Needs ``ckanext.spatial.use_postgis_sorting`` set to True
|
||||
|
||||
|
||||
|
||||
We recommend to use the ``solr`` backend whenever possible. Here are more
|
||||
details about the available options:
|
||||
|
||||
* ``solr`` (Recommended)
|
||||
This option uses normal Solr fields to index the relevant bits of
|
||||
information about the geometry and uses an algorithm function to sort
|
||||
results by relevance, keeping any other non-spatial filtering. It only
|
||||
supports bounding boxes both for the geometries to be indexed and the
|
||||
input query shape. It requires `EDisMax`_ query parser, so it will only
|
||||
work on versions of Solr greater than 3.1 (We recommend using Solr 4.x).
|
||||
|
||||
You will need to add the following fields to your Solr schema file to
|
||||
enable it::
|
||||
|
||||
<fields>
|
||||
<!-- ... -->
|
||||
<field name="bbox_area" type="float" indexed="true" stored="true" />
|
||||
<field name="maxx" type="float" indexed="true" stored="true" />
|
||||
<field name="maxy" type="float" indexed="true" stored="true" />
|
||||
<field name="minx" type="float" indexed="true" stored="true" />
|
||||
<field name="miny" type="float" indexed="true" stored="true" />
|
||||
</fields>
|
||||
|
||||
|
||||
* ``solr-spatial-field``
|
||||
This option uses the `spatial field`_ introduced in Solr 4, which allows
|
||||
to index points, rectangles and more complex geometries (complex geometries
|
||||
will require `JTS`_, check the documentation).
|
||||
Sorting has not yet been implemented, users willing to do so will need to
|
||||
modify the query using the ``before_search`` extension point.
|
||||
|
||||
You will need to add the following field type and field to your Solr
|
||||
schema file to enable it (Check the `Solr documentation`__ for more
|
||||
information on the different parameters, note that you don't need
|
||||
``spatialContextFactory`` if you are not using JTS)::
|
||||
|
||||
<types>
|
||||
<!-- ... -->
|
||||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
||||
spatialContextFactory="com.spatial4j.core.context.jts.JtsSpatialContextFactory"
|
||||
distErrPct="0.025"
|
||||
maxDistErr="0.000009"
|
||||
units="degrees" />
|
||||
</types>
|
||||
<fields>
|
||||
<!-- ... -->
|
||||
<field name="spatial_geom" type="location_rpt" indexed="true" stored="true" multiValued="true" />
|
||||
</fields>
|
||||
|
||||
* ``postgis``
|
||||
This is the original implementation of the spatial search. It
|
||||
does not require any change in the Solr schema and can run on Solr 1.x,
|
||||
but it is not as efficient as the previous ones. Basically the bounding
|
||||
box based query is performed in PostGIS first, and the ids of the matched
|
||||
datasets are added as a filter to the Solr request. This, apart from being
|
||||
much less efficient, can led to issues on Solr due to size of the requests
|
||||
(See `Solr configuration issues on legacy PostGIS backend`_). There is
|
||||
support for a spatial ranking on this backend (setting
|
||||
``ckanext.spatial.use_postgis_sorting`` to True on the ini file), but
|
||||
it can not be combined with any other filtering.
|
||||
|
||||
|
||||
Spatial Search Widget
|
||||
---------------------
|
||||
|
||||
|
||||
.. image:: _static/spatial-search-widget.png
|
||||
|
||||
The extension provides a snippet to add a map widget to the search form, which
|
||||
allows filtering results by an area of interest.
|
||||
|
||||
To add the map widget to the to the sidebar of the search page, add this to the
|
||||
dataset search page template
|
||||
(``myproj/ckanext/myproj/templates/package/search.html``)::
|
||||
|
||||
{% block secondary_content %}
|
||||
|
||||
{% snippet "spatial/snippets/spatial_query.html" %}
|
||||
|
||||
{% endblock %}
|
||||
|
||||
By default the map widget will show the whole world. If you want to set up a
|
||||
different default extent, you can pass an extra ``default_extent`` to the
|
||||
snippet, either with a pair of coordinates like this::
|
||||
|
||||
{% snippet "spatial/snippets/spatial_query.html", default_extent="[[15.62,
|
||||
-139.21], [64.92, -61.87]]" %}
|
||||
|
||||
or with a GeoJSON object describing a bounding box (note the escaped quotes)::
|
||||
|
||||
{% snippet "spatial/snippets/spatial_query.html", default_extent="{ \"type\":
|
||||
\"Polygon\", \"coordinates\": [[[74.89, 29.39],[74.89, 38.45], [60.50,
|
||||
38.45], [60.50, 29.39], [74.89, 29.39]]]}" %}
|
||||
|
||||
You need to load the ``spatial_metadata`` and ``spatial_query`` plugins to use this
|
||||
snippet.
|
||||
|
||||
|
||||
|
||||
Dataset Extent Map
|
||||
------------------
|
||||
|
||||
.. image:: _static/dataset-extent-map.png
|
||||
|
||||
Using the snippets provided, if datasets contain a ``spatial`` extra like the
|
||||
one described in the previous section, a map will be shown on the dataset
|
||||
details page.
|
||||
|
||||
There are snippets already created to load the map on the left sidebar or in
|
||||
the main body of the dataset details page, but these can be easily modified to
|
||||
suit your project needs
|
||||
|
||||
To add a map to the sidebar, add this to the dataset details page template (eg
|
||||
``myproj/ckanext/myproj/templates/package/read.html``)::
|
||||
|
||||
{% block secondary_content %}
|
||||
{{ super() }}
|
||||
|
||||
{% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %}
|
||||
{% if dataset_extent %}
|
||||
{% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %}
|
||||
{% endif %}
|
||||
|
||||
{% endblock %}
|
||||
|
||||
For adding the map to the main body, add this::
|
||||
|
||||
{% block primary_content %}
|
||||
|
||||
<!-- ... -->
|
||||
|
||||
<article class="module prose">
|
||||
|
||||
<!-- ... -->
|
||||
|
||||
{% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %}
|
||||
{% if dataset_extent %}
|
||||
{% snippet "spatial/snippets/dataset_map.html", extent=dataset_extent %}
|
||||
{% endif %}
|
||||
|
||||
</article>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
You need to load the ``spatial_metadata`` plugin to use these snippets.
|
||||
|
||||
Legacy Search
|
||||
-------------
|
||||
|
||||
Solr configuration issues on legacy PostGIS backend
|
||||
+++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
.. warning::
|
||||
|
||||
If you find any of the issues described in this section it is strongly
|
||||
recommended that you consider switching to one of the Solr based backends
|
||||
which are much more efficient. These notes are just kept for informative
|
||||
purposes.
|
||||
|
||||
|
||||
If using Spatial Query functionality then there is an additional SOLR/Lucene
|
||||
setting that should be used to set the limit on number of datasets searchable
|
||||
with a spatial value.
|
||||
|
||||
The setting is ``maxBooleanClauses`` in the solrconfig.xml and the value is the
|
||||
number of datasets spatially searchable. The default is ``1024`` and this could
|
||||
be increased to say ``16384``. For a SOLR single core this will probably be at
|
||||
`/etc/solr/conf/solrconfig.xml`. For a multiple core set-up, there will me
|
||||
several solrconfig.xml files a couple of levels below `/etc/solr`. For that
|
||||
case, *all* of the cores' `solrconfig.xml` should have this setting at the new
|
||||
value.
|
||||
|
||||
Example::
|
||||
|
||||
<maxBooleanClauses>16384</maxBooleanClauses>
|
||||
|
||||
This setting is needed because PostGIS spatial query results are fed into SOLR
|
||||
using a Boolean expression, and the parser for that has a limit. So if your
|
||||
spatial area contains more than the limit (of which the default is 1024) then
|
||||
you will get this error::
|
||||
|
||||
Dataset search error: ('SOLR returned an error running query...
|
||||
|
||||
and in the SOLR logs you see::
|
||||
|
||||
too many boolean clauses ... Caused by:
|
||||
org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to
|
||||
1024
|
||||
|
||||
|
||||
Legacy API
|
||||
++++++++++
|
||||
|
||||
The extension adds the following call to the CKAN search API, which returns
|
||||
datasets with an extent that intersects with the bounding box provided::
|
||||
|
||||
/api/2/search/dataset/geo?bbox={minx,miny,maxx,maxy}[&crs={srid}]
|
||||
|
||||
If the bounding box coordinates are not in the same projection as the one
|
||||
defined in the database, a CRS must be provided, in one of the following forms:
|
||||
|
||||
- `urn:ogc:def:crs:EPSG::4326`
|
||||
- EPSG:4326
|
||||
- 4326
|
||||
|
||||
.. _action API: http://docs.ckan.org/en/latest/apiv3.html
|
||||
.. _edismax: http://wiki.apache.org/solr/ExtendedDisMax
|
||||
.. _JTS: http://www.vividsolutions.com/jts/JTSHome.htm
|
||||
.. _spatial field: http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
||||
__ `spatial field`_
|
||||
.. _GeoJSON: http://geojson.org
|
Loading…
Reference in New Issue