Merge branch 'master' into release-v2.0
This commit is contained in:
commit
f1e27c717c
|
@ -23,12 +23,14 @@ from ckan import model
|
||||||
from ckan.lib.helpers import json
|
from ckan.lib.helpers import json
|
||||||
from ckan import logic
|
from ckan import logic
|
||||||
from ckan.lib.navl.validators import not_empty
|
from ckan.lib.navl.validators import not_empty
|
||||||
|
from ckan.lib.search.index import PackageSearchIndex
|
||||||
|
|
||||||
from ckanext.harvest.harvesters.base import HarvesterBase
|
from ckanext.harvest.harvesters.base import HarvesterBase
|
||||||
from ckanext.harvest.model import HarvestObject
|
from ckanext.harvest.model import HarvestObject
|
||||||
|
|
||||||
from ckanext.spatial.validation import Validators, all_validators
|
from ckanext.spatial.validation import Validators, all_validators
|
||||||
from ckanext.spatial.model import ISODocument
|
from ckanext.spatial.model import ISODocument
|
||||||
|
from ckanext.spatial.interfaces import ISpatialHarvester
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -109,6 +111,8 @@ class SpatialHarvester(HarvesterBase):
|
||||||
|
|
||||||
_user_name = None
|
_user_name = None
|
||||||
|
|
||||||
|
_site_user = None
|
||||||
|
|
||||||
source_config = {}
|
source_config = {}
|
||||||
|
|
||||||
force_import = False
|
force_import = False
|
||||||
|
@ -146,10 +150,6 @@ class SpatialHarvester(HarvesterBase):
|
||||||
|
|
||||||
## SpatialHarvester
|
## SpatialHarvester
|
||||||
|
|
||||||
'''
|
|
||||||
These methods can be safely overridden by classes extending
|
|
||||||
SpatialHarvester
|
|
||||||
'''
|
|
||||||
|
|
||||||
def get_package_dict(self, iso_values, harvest_object):
|
def get_package_dict(self, iso_values, harvest_object):
|
||||||
'''
|
'''
|
||||||
|
@ -157,19 +157,23 @@ class SpatialHarvester(HarvesterBase):
|
||||||
package_update. See documentation on
|
package_update. See documentation on
|
||||||
ckan.logic.action.create.package_create for more details
|
ckan.logic.action.create.package_create for more details
|
||||||
|
|
||||||
Tipically, custom harvesters would only want to add or modify the
|
Extensions willing to modify the dict should do so implementing the
|
||||||
extras, but the whole method can be replaced if necessary. Note that
|
ISpatialHarvester interface
|
||||||
if only minor modifications need to be made you can call the parent
|
|
||||||
method from your custom harvester and modify the output, eg:
|
|
||||||
|
|
||||||
class MyHarvester(SpatialHarvester):
|
import ckan.plugins as p
|
||||||
|
from ckanext.spatial.interfaces import ISpatialHarvester
|
||||||
|
|
||||||
def get_package_dict(self, iso_values, harvest_object):
|
class MyHarvester(p.SingletonPlugin):
|
||||||
|
|
||||||
package_dict = super(MyHarvester, self).get_package_dict(iso_values, harvest_object)
|
p.implements(ISpatialHarvester, inherit=True)
|
||||||
|
|
||||||
package_dict['extras']['my-custom-extra-1'] = 'value1'
|
def get_package_dict(self, context, data_dict):
|
||||||
package_dict['extras']['my-custom-extra-2'] = 'value2'
|
|
||||||
|
package_dict = data_dict['package_dict']
|
||||||
|
|
||||||
|
package_dict['extras'].append(
|
||||||
|
{'key': 'my-custom-extra', 'value': 'my-custom-value'}
|
||||||
|
)
|
||||||
|
|
||||||
return package_dict
|
return package_dict
|
||||||
|
|
||||||
|
@ -364,34 +368,18 @@ class SpatialHarvester(HarvesterBase):
|
||||||
|
|
||||||
def transform_to_iso(self, original_document, original_format, harvest_object):
|
def transform_to_iso(self, original_document, original_format, harvest_object):
|
||||||
'''
|
'''
|
||||||
Transforms an XML document to ISO 19139
|
DEPRECATED: Use the transform_to_iso method of the ISpatialHarvester
|
||||||
|
interface
|
||||||
This method will be only called from the import stage if the
|
|
||||||
harvest_object content is null and original_document and
|
|
||||||
original_format harvest object extras exist (eg if an FGDC document
|
|
||||||
was harvested).
|
|
||||||
|
|
||||||
In that case, this method should do the necessary to provide an
|
|
||||||
ISO 1939 like document, otherwise the import process will stop.
|
|
||||||
|
|
||||||
|
|
||||||
:param original_document: Original XML document
|
|
||||||
:type original_document: string
|
|
||||||
:param original_format: Original format (eg 'fgdc')
|
|
||||||
:type original_format: string
|
|
||||||
:param harvest_object: HarvestObject domain object (with access to
|
|
||||||
job and source objects)
|
|
||||||
:type harvest_object: HarvestObject
|
|
||||||
|
|
||||||
:returns: An ISO 19139 document or None if the transformation was not
|
|
||||||
successful
|
|
||||||
:rtype: string
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
self.__base_transform_to_iso_called = True
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def import_stage(self, harvest_object):
|
def import_stage(self, harvest_object):
|
||||||
|
context = {
|
||||||
|
'model': model,
|
||||||
|
'session': model.Session,
|
||||||
|
'user': self._get_user_name(),
|
||||||
|
}
|
||||||
|
|
||||||
log = logging.getLogger(__name__ + '.import')
|
log = logging.getLogger(__name__ + '.import')
|
||||||
log.debug('Import stage for harvest object: %s', harvest_object.id)
|
log.debug('Import stage for harvest object: %s', harvest_object.id)
|
||||||
|
@ -415,8 +403,9 @@ class SpatialHarvester(HarvesterBase):
|
||||||
|
|
||||||
if status == 'delete':
|
if status == 'delete':
|
||||||
# Delete package
|
# Delete package
|
||||||
context = {'model': model, 'session': model.Session, 'user': self._get_user_name()}
|
context.update({
|
||||||
|
'ignore_auth': True,
|
||||||
|
})
|
||||||
p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id})
|
p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id})
|
||||||
log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid))
|
log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid))
|
||||||
|
|
||||||
|
@ -426,7 +415,16 @@ class SpatialHarvester(HarvesterBase):
|
||||||
original_document = self._get_object_extra(harvest_object, 'original_document')
|
original_document = self._get_object_extra(harvest_object, 'original_document')
|
||||||
original_format = self._get_object_extra(harvest_object, 'original_format')
|
original_format = self._get_object_extra(harvest_object, 'original_format')
|
||||||
if original_document and original_format:
|
if original_document and original_format:
|
||||||
|
#DEPRECATED use the ISpatialHarvester interface method
|
||||||
|
self.__base_transform_to_iso_called = False
|
||||||
content = self.transform_to_iso(original_document, original_format, harvest_object)
|
content = self.transform_to_iso(original_document, original_format, harvest_object)
|
||||||
|
if not self.__base_transform_to_iso_called:
|
||||||
|
log.warn('Deprecation warning: calling transform_to_iso directly is deprecated. ' +
|
||||||
|
'Please use the ISpatialHarvester interface method instead.')
|
||||||
|
|
||||||
|
for harvester in p.PluginImplementations(ISpatialHarvester):
|
||||||
|
content = harvester.transform_to_iso(original_document, original_format, harvest_object)
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
harvest_object.content = content
|
harvest_object.content = content
|
||||||
else:
|
else:
|
||||||
|
@ -449,7 +447,9 @@ class SpatialHarvester(HarvesterBase):
|
||||||
|
|
||||||
# Parse ISO document
|
# Parse ISO document
|
||||||
try:
|
try:
|
||||||
iso_values = ISODocument(harvest_object.content).read_values()
|
|
||||||
|
iso_parser = ISODocument(harvest_object.content)
|
||||||
|
iso_values = iso_parser.read_values()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self._save_object_error('Error parsing ISO document for object {0}: {1}'.format(harvest_object.id, str(e)),
|
self._save_object_error('Error parsing ISO document for object {0}: {1}'.format(harvest_object.id, str(e)),
|
||||||
harvest_object, 'Import')
|
harvest_object, 'Import')
|
||||||
|
@ -495,21 +495,27 @@ class SpatialHarvester(HarvesterBase):
|
||||||
harvest_object.metadata_modified_date = metadata_modified_date
|
harvest_object.metadata_modified_date = metadata_modified_date
|
||||||
harvest_object.add()
|
harvest_object.add()
|
||||||
|
|
||||||
|
|
||||||
# Build the package dict
|
# Build the package dict
|
||||||
package_dict = self.get_package_dict(iso_values, harvest_object)
|
package_dict = self.get_package_dict(iso_values, harvest_object)
|
||||||
|
for harvester in p.PluginImplementations(ISpatialHarvester):
|
||||||
|
package_dict = harvester.get_package_dict(context, {
|
||||||
|
'package_dict': package_dict,
|
||||||
|
'iso_values': iso_values,
|
||||||
|
'xml_tree': iso_parser.xml_tree,
|
||||||
|
'harvest_object': harvest_object,
|
||||||
|
})
|
||||||
if not package_dict:
|
if not package_dict:
|
||||||
log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id))
|
log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Create / update the package
|
# Create / update the package
|
||||||
|
context.update({
|
||||||
|
'extras_as_string': True,
|
||||||
|
'api_version': '2',
|
||||||
|
'return_id_only': True})
|
||||||
|
|
||||||
context = {'model': model,
|
if self._site_user and context['user'] == self._site_user['name']:
|
||||||
'session': model.Session,
|
|
||||||
'user': self._get_user_name(),
|
|
||||||
'extras_as_string': True,
|
|
||||||
'api_version': '2',
|
|
||||||
'return_id_only': True}
|
|
||||||
if context['user'] == self._site_user['name']:
|
|
||||||
context['ignore_auth'] = True
|
context['ignore_auth'] = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -550,7 +556,7 @@ class SpatialHarvester(HarvesterBase):
|
||||||
elif status == 'change':
|
elif status == 'change':
|
||||||
|
|
||||||
# Check if the modified date is more recent
|
# Check if the modified date is more recent
|
||||||
if not self.force_import and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date:
|
if not self.force_import and previous_object and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date:
|
||||||
|
|
||||||
# Assign the previous job id to the new object to
|
# Assign the previous job id to the new object to
|
||||||
# avoid losing history
|
# avoid losing history
|
||||||
|
@ -560,6 +566,25 @@ class SpatialHarvester(HarvesterBase):
|
||||||
# Delete the previous object to avoid cluttering the object table
|
# Delete the previous object to avoid cluttering the object table
|
||||||
previous_object.delete()
|
previous_object.delete()
|
||||||
|
|
||||||
|
# Reindex the corresponding package to update the reference to the
|
||||||
|
# harvest object
|
||||||
|
if ((config.get('ckanext.spatial.harvest.reindex_unchanged', True) != 'False'
|
||||||
|
or self.source_config.get('reindex_unchanged') != 'False')
|
||||||
|
and harvest_object.package_id):
|
||||||
|
context.update({'validate': False, 'ignore_auth': True})
|
||||||
|
try:
|
||||||
|
package_dict = logic.get_action('package_show')(context,
|
||||||
|
{'id': harvest_object.package_id})
|
||||||
|
except p.toolkit.ObjectNotFound:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for extra in package_dict.get('extras', []):
|
||||||
|
if extra['key'] == 'harvest_object_id':
|
||||||
|
extra['value'] = harvest_object.id
|
||||||
|
if package_dict:
|
||||||
|
package_index = PackageSearchIndex()
|
||||||
|
package_index.index_package(package_dict)
|
||||||
|
|
||||||
log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid))
|
log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid))
|
||||||
else:
|
else:
|
||||||
package_schema = logic.schema.default_update_package_schema()
|
package_schema = logic.schema.default_update_package_schema()
|
||||||
|
@ -637,6 +662,15 @@ class SpatialHarvester(HarvesterBase):
|
||||||
else:
|
else:
|
||||||
profiles = DEFAULT_VALIDATOR_PROFILES
|
profiles = DEFAULT_VALIDATOR_PROFILES
|
||||||
self._validator = Validators(profiles=profiles)
|
self._validator = Validators(profiles=profiles)
|
||||||
|
|
||||||
|
# Add any custom validators from extensions
|
||||||
|
for plugin_with_validators in p.PluginImplementations(ISpatialHarvester):
|
||||||
|
custom_validators = plugin_with_validators.get_validators()
|
||||||
|
for custom_validator in custom_validators:
|
||||||
|
if custom_validator not in all_validators:
|
||||||
|
self._validator.add_validator(custom_validator)
|
||||||
|
|
||||||
|
|
||||||
return self._validator
|
return self._validator
|
||||||
|
|
||||||
def _get_user_name(self):
|
def _get_user_name(self):
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
from ckan.plugins.interfaces import Interface
|
||||||
|
|
||||||
|
|
||||||
|
class ISpatialHarvester(Interface):
|
||||||
|
|
||||||
|
def get_package_dict(self, context, data_dict):
|
||||||
|
'''
|
||||||
|
Allows to modify the dataset dict that will be created or updated
|
||||||
|
|
||||||
|
This is the dict that the harvesters will pass to the `package_create`
|
||||||
|
or `package_update` actions. Extensions can modify it to suit their
|
||||||
|
needs, adding or removing filds, modifying the default ones, etc.
|
||||||
|
|
||||||
|
This method should always return a package_dict. Note that, although
|
||||||
|
unlikely in a particular instance, this method could be implemented by
|
||||||
|
more than one plugin.
|
||||||
|
|
||||||
|
If a dict is not returned by this function, the import stage will be
|
||||||
|
cancelled.
|
||||||
|
|
||||||
|
.. note:: Make sure to run ``model.Session.flush()`` if you perform
|
||||||
|
queries using the model included in the ``context`` object.
|
||||||
|
|
||||||
|
|
||||||
|
:param context: Contains a reference to the model, eg to
|
||||||
|
perform DB queries, and the user name used for
|
||||||
|
authorization.
|
||||||
|
:type context: dict
|
||||||
|
:param data_dict: Available data. Contains three keys:
|
||||||
|
|
||||||
|
* `package_dict`
|
||||||
|
The default package_dict generated by the harvester. Modify this
|
||||||
|
or create a brand new one.
|
||||||
|
* `iso_values`
|
||||||
|
The parsed ISO XML document values. These contain more fields
|
||||||
|
that are not added by default to the ``package_dict``.
|
||||||
|
* `xml_tree`
|
||||||
|
The full XML etree object. If some values not present in
|
||||||
|
``iso_values`` are needed, these can be extracted via xpath.
|
||||||
|
* `harvest_object`
|
||||||
|
A ``HarvestObject`` domain object which contains a reference
|
||||||
|
to the original metadata document (``harvest_object.content``)
|
||||||
|
and the harvest source (``harvest_object.source``).
|
||||||
|
|
||||||
|
:type data_dict: dict
|
||||||
|
|
||||||
|
:returns: A dataset dict ready to be used by ``package_create`` or
|
||||||
|
``package_update``
|
||||||
|
:rtype: dict
|
||||||
|
'''
|
||||||
|
return data_dict['package_dict']
|
||||||
|
|
||||||
|
def get_validators(self):
|
||||||
|
'''
|
||||||
|
Allows to register custom Validators that can be applied to harvested
|
||||||
|
metadata documents.
|
||||||
|
|
||||||
|
Validators are classes that implement the ``is_valid`` method. Check
|
||||||
|
the `Writing custom validators`_ section in the docs to know more
|
||||||
|
about writing custom validators.
|
||||||
|
|
||||||
|
:returns: A list of Validator classes
|
||||||
|
:rtype: list
|
||||||
|
'''
|
||||||
|
return []
|
||||||
|
|
||||||
|
def transform_to_iso(self, original_document, original_format, harvest_object):
|
||||||
|
'''
|
||||||
|
Transforms an XML document to ISO 19139
|
||||||
|
|
||||||
|
This method will be only called from the import stage if the
|
||||||
|
harvest_object content is null and original_document and
|
||||||
|
original_format harvest object extras exist (eg if an FGDC document
|
||||||
|
was harvested).
|
||||||
|
|
||||||
|
In that case, this method should do the necessary to provide an
|
||||||
|
ISO 1939 like document, otherwise the import process will stop.
|
||||||
|
|
||||||
|
|
||||||
|
:param original_document: Original XML document
|
||||||
|
:type original_document: string
|
||||||
|
:param original_format: Original format (eg 'fgdc')
|
||||||
|
:type original_format: string
|
||||||
|
:param harvest_object: HarvestObject domain object (with access to
|
||||||
|
job and source objects)
|
||||||
|
:type harvest_object: HarvestObject
|
||||||
|
|
||||||
|
:returns: An ISO 19139 document or None if the transformation was not
|
||||||
|
successful
|
||||||
|
:rtype: string
|
||||||
|
|
||||||
|
'''
|
||||||
|
return None
|
||||||
|
|
|
@ -31,7 +31,8 @@ separate stages:
|
||||||
content into a CKAN dataset: validates the document, parses it, converts it
|
content into a CKAN dataset: validates the document, parses it, converts it
|
||||||
to a CKAN dataset dict and saves it in the database.
|
to a CKAN dataset dict and saves it in the database.
|
||||||
|
|
||||||
The extension provides different XSD and schematron based validators. You can
|
The extension provides different XSD and schematron based validators, and you
|
||||||
|
can also write your own (see `Writing custom validators`_). You can
|
||||||
specify which validators to use for the remote documents with the following
|
specify which validators to use for the remote documents with the following
|
||||||
configuration option::
|
configuration option::
|
||||||
|
|
||||||
|
@ -51,27 +52,191 @@ hardcoded 'harvest' user::
|
||||||
|
|
||||||
ckanext.spatial.harvest.user_name = harvest
|
ckanext.spatial.harvest.user_name = harvest
|
||||||
|
|
||||||
|
When a document has not been updated remotely, the previous harvest object is
|
||||||
|
replaced by the current one rather than keeping it, to avoid cluttering the
|
||||||
|
``harvest_object`` table. This means that the ``harvest_object_id`` reference
|
||||||
|
on the linked dataset needs to be updated, by reindexing it. This will happen
|
||||||
|
by default, but if you want to turn it off (eg if you are doing separate
|
||||||
|
reindexing) it can be turn off with the following option::
|
||||||
|
|
||||||
|
ckanext.spatial.harvest.reindex_unchanged = False
|
||||||
|
|
||||||
|
|
||||||
Customizing the harvesters
|
Customizing the harvesters
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
The default harvesters provided in this extension can be overriden from
|
The default harvesters provided in this extension can be extended from
|
||||||
extensions to customize to your needs. You can either extend ``CswHarvester``,
|
extensions implementing the ``ISpatialHarvester`` interface.
|
||||||
``WAFfHarverster`` or the main ``SpatialHarvester`` class. There are some
|
|
||||||
extension points that can be safely overriden from your extension. Probably the
|
|
||||||
most useful is ``get_package_dict``, which allows to tweak the dataset fields
|
|
||||||
before creating or updating them. ``transform_to_iso`` allows to hook into
|
|
||||||
transformation mechanisms to transform other formats into ISO1939, the only one
|
|
||||||
directly supported byt he spatial harvesters. Finally, the whole
|
|
||||||
``import_stage`` can be overriden if the default logic does not suit your
|
|
||||||
needs.
|
|
||||||
|
|
||||||
Check the source code of ``ckanext/spatial/harvesters/base.py`` for more
|
Probably the most useful extension point is ``get_package_dict``, which
|
||||||
details on these functions.
|
allows to tweak the dataset fields before creating or updating it::
|
||||||
|
|
||||||
|
import ckan.plugins as p
|
||||||
|
from ckanext.spatial.interfaces import ISpatialHarvester
|
||||||
|
|
||||||
|
class MyPlugin(p.SingletonPlugin):
|
||||||
|
|
||||||
|
p.implements(ISpatialHarvester, inherit=True)
|
||||||
|
|
||||||
|
def get_package_dict(self, context, data_dict):
|
||||||
|
|
||||||
|
# Check the reference below to see all that's included on data_dict
|
||||||
|
|
||||||
|
package_dict = data_dict['package_dict']
|
||||||
|
iso_values = data_dict['iso_values']
|
||||||
|
|
||||||
|
package_dict['extras'].append(
|
||||||
|
{'key': 'topic-category', 'value': iso_values.get('topic-category')}
|
||||||
|
)
|
||||||
|
|
||||||
|
package_dict['extras'].append(
|
||||||
|
{'key': 'my-custom-extra', 'value': 'my-custom-value'}
|
||||||
|
)
|
||||||
|
|
||||||
|
return package_dict
|
||||||
|
|
||||||
|
``get_validators`` allows to register custom validation classes that can be
|
||||||
|
applied to the harvested documents. Check the `Writing custom validators`_
|
||||||
|
section to know more about how to write your custom validators::
|
||||||
|
|
||||||
|
import ckan.plugins as p
|
||||||
|
from ckanext.spatial.interfaces import ISpatialHarvester
|
||||||
|
from ckanext.spatial.validation.validation import BaseValidator
|
||||||
|
|
||||||
|
class MyPlugin(p.SingletonPlugin):
|
||||||
|
|
||||||
|
p.implements(ISpatialHarvester, inherit=True)
|
||||||
|
|
||||||
|
def get_validators(self):
|
||||||
|
return [MyValidator]
|
||||||
|
|
||||||
|
|
||||||
|
class MyValidator(BaseValidator):
|
||||||
|
|
||||||
|
name = 'my-validator'
|
||||||
|
|
||||||
|
title= 'My very own validator'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid(cls, xml):
|
||||||
|
|
||||||
|
return True, []
|
||||||
|
|
||||||
|
|
||||||
|
``transform_to_iso`` allows to hook into transformation mechanisms to
|
||||||
|
transform other formats into ISO1939, the only one directly supported by
|
||||||
|
the spatial harvesters.
|
||||||
|
|
||||||
|
Here is the full reference for the provided extension points:
|
||||||
|
|
||||||
|
.. autoclass:: ckanext.spatial.interfaces.ISpatialHarvester
|
||||||
|
:members:
|
||||||
|
|
||||||
|
If you need to further customize the default behaviour of the harvesters, you
|
||||||
|
can either extend ``CswHarvester``, ``WAFfHarverster`` or the main
|
||||||
|
``SpatialHarvester`` class., for instance to override the whole
|
||||||
|
``import_stage`` if the default logic does not suit your
|
||||||
|
needs.
|
||||||
|
|
||||||
The `ckanext-geodatagov`_ extension contains live examples on how to extend
|
The `ckanext-geodatagov`_ extension contains live examples on how to extend
|
||||||
the default spatial harvesters and create new ones for other spatial services
|
the default spatial harvesters and create new ones for other spatial services
|
||||||
like ArcGIS REST APIs.
|
like ArcGIS REST APIs.
|
||||||
|
|
||||||
|
Writing custom validators
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Validator classes extend the ``BaseValidator`` class:
|
||||||
|
|
||||||
|
.. autoclass:: ckanext.spatial.validation.validation.BaseValidator
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Helper classes are provided for XSD and schematron based validation, and
|
||||||
|
completely custom logic can be also implemented. Here are some examples of
|
||||||
|
the most common types:
|
||||||
|
|
||||||
|
* XSD based validators::
|
||||||
|
|
||||||
|
class ISO19139NGDCSchema(XsdValidator):
|
||||||
|
'''
|
||||||
|
XSD based validation for ISO 19139 documents.
|
||||||
|
|
||||||
|
Uses XSD schema from the NOAA National Geophysical Data Center:
|
||||||
|
|
||||||
|
http://ngdc.noaa.gov/metadata/published/xsd/
|
||||||
|
|
||||||
|
'''
|
||||||
|
name = 'iso19139ngdc'
|
||||||
|
title = 'ISO19139 XSD Schema (NGDC)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid(cls, xml):
|
||||||
|
xsd_path = 'xml/iso19139ngdc'
|
||||||
|
|
||||||
|
xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||||
|
xsd_path, 'schema.xsd')
|
||||||
|
return cls._is_valid(xml, xsd_filepath, 'NGDC Schema (schema.xsd)')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
* Schematron validators::
|
||||||
|
|
||||||
|
class Gemini2Schematron(SchematronValidator):
|
||||||
|
name = 'gemini2'
|
||||||
|
title = 'GEMINI 2.1 Schematron 1.2'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_schematrons(cls):
|
||||||
|
with resource_stream("ckanext.spatial",
|
||||||
|
"validation/xml/gemini2/gemini2-schematron-20110906-v1.2.sch") as schema:
|
||||||
|
return [cls.schematron(schema)]
|
||||||
|
|
||||||
|
|
||||||
|
* Custom validators::
|
||||||
|
|
||||||
|
class MinimalFGDCValidator(BaseValidator):
|
||||||
|
|
||||||
|
name = 'fgdc_minimal'
|
||||||
|
title = 'FGDC Minimal Validation'
|
||||||
|
|
||||||
|
_elements = [
|
||||||
|
('Identification Citation Title', '/metadata/idinfo/citation/citeinfo/title'),
|
||||||
|
('Identification Citation Originator', '/metadata/idinfo/citation/citeinfo/origin'),
|
||||||
|
('Identification Citation Publication Date', '/metadata/idinfo/citation/citeinfo/pubdate'),
|
||||||
|
('Identification Description Abstract', '/metadata/idinfo/descript/abstract'),
|
||||||
|
('Identification Spatial Domain West Bounding Coordinate', '/metadata/idinfo/spdom/bounding/westbc'),
|
||||||
|
('Identification Spatial Domain East Bounding Coordinate', '/metadata/idinfo/spdom/bounding/eastbc'),
|
||||||
|
('Identification Spatial Domain North Bounding Coordinate', '/metadata/idinfo/spdom/bounding/northbc'),
|
||||||
|
('Identification Spatial Domain South Bounding Coordinate', '/metadata/idinfo/spdom/bounding/southbc'),
|
||||||
|
('Metadata Reference Information Contact Address Type', '/metadata/metainfo/metc/cntinfo/cntaddr/addrtype'),
|
||||||
|
('Metadata Reference Information Contact Address State', '/metadata/metainfo/metc/cntinfo/cntaddr/state'),
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid(cls, xml):
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
for title, xpath in cls._elements:
|
||||||
|
element = xml.xpath(xpath)
|
||||||
|
if len(element) == 0 or not element[0].text:
|
||||||
|
errors.append(('Element not found: {0}'.format(title), None))
|
||||||
|
if len(errors):
|
||||||
|
return False, errors
|
||||||
|
|
||||||
|
return True, []
|
||||||
|
|
||||||
|
|
||||||
|
The `validation.py`_ file included in the ckanext-spatial extension contains
|
||||||
|
more examples of the different types.
|
||||||
|
|
||||||
|
Remember that after registering your own validators you must specify them on
|
||||||
|
the following configuration option::
|
||||||
|
|
||||||
|
ckan.spatial.validator.profiles = iso19193eden,my-validator
|
||||||
|
|
||||||
|
|
||||||
|
.. _validation.py: https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/validation/validation.py
|
||||||
|
|
||||||
Harvest Metadata API
|
Harvest Metadata API
|
||||||
--------------------
|
--------------------
|
||||||
|
|
Loading…
Reference in New Issue