Merge branch '2.0-validation-changes' into release-v2.0
This commit is contained in:
commit
3142794524
|
@ -111,15 +111,19 @@ class SpatialHarvester(object):
|
|||
finally:
|
||||
log.error(message)
|
||||
|
||||
def _save_object_error(self,message,obj,stage=u'Fetch'):
|
||||
err = HarvestObjectError(message=message,object=obj,stage=stage)
|
||||
def _save_object_error(self,message,obj,stage=u'Fetch',line=None):
|
||||
err = HarvestObjectError(message=message,
|
||||
object=obj,
|
||||
stage=stage,
|
||||
line=line)
|
||||
try:
|
||||
err.save()
|
||||
except InvalidRequestError,e:
|
||||
Session.rollback()
|
||||
err.save()
|
||||
finally:
|
||||
log.error(message)
|
||||
log_message = '{0}, line {1}'.format(message,line) if line else message
|
||||
log.debug(log_message)
|
||||
|
||||
def _get_content(self, url):
|
||||
url = url.replace(' ','%20')
|
||||
|
@ -199,11 +203,10 @@ class GeminiHarvester(SpatialHarvester):
|
|||
def import_gemini_object(self, gemini_string):
|
||||
log = logging.getLogger(__name__ + '.import')
|
||||
xml = etree.fromstring(gemini_string)
|
||||
|
||||
valid, messages = self._get_validator().is_valid(xml)
|
||||
valid, profile, errors = self._get_validator().is_valid(xml)
|
||||
if not valid:
|
||||
out = errors[0][0] + ':\n' + '\n'.join(e[0] for e in errors[1:])
|
||||
log.error('Errors found for object with GUID %s:' % self.obj.guid)
|
||||
out = messages[0] + ':\n' + '\n'.join(messages[1:])
|
||||
self._save_object_error(out,self.obj,'Import')
|
||||
|
||||
unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True)
|
||||
|
@ -570,9 +573,9 @@ class GeminiHarvester(SpatialHarvester):
|
|||
if gemini_xml is None:
|
||||
self._save_gather_error('Content is not a valid Gemini document',self.harvest_job)
|
||||
|
||||
valid, messages = self._get_validator().is_valid(gemini_xml)
|
||||
valid, profile, errors = self._get_validator().is_valid(gemini_xml)
|
||||
if not valid:
|
||||
out = messages[0] + ':\n' + '\n'.join(messages[1:])
|
||||
out = errors[0][0] + ':\n' + '\n'.join(e[0] for e in errors[1:])
|
||||
if url:
|
||||
self._save_gather_error('Validation error for %s - %s'% (url,out),self.harvest_job)
|
||||
else:
|
||||
|
@ -780,7 +783,6 @@ class GeminiWafHarvester(GeminiHarvester, SingletonPlugin):
|
|||
self._save_gather_error('Unable to get content for URL: %s: %r' % \
|
||||
(url, e),harvest_job)
|
||||
return None
|
||||
|
||||
ids = []
|
||||
try:
|
||||
for url in self._extract_urls(content,url):
|
||||
|
@ -816,7 +818,6 @@ class GeminiWafHarvester(GeminiHarvester, SingletonPlugin):
|
|||
self._save_gather_error(msg,harvest_job)
|
||||
return None
|
||||
|
||||
|
||||
if len(ids) > 0:
|
||||
return ids
|
||||
else:
|
||||
|
|
|
@ -161,12 +161,14 @@ class CswService(OwsService):
|
|||
md = csw._exml.find("/{http://www.isotc211.org/2005/gmd}MD_Metadata")
|
||||
mdtree = etree.ElementTree(md)
|
||||
try:
|
||||
record["xml"] = etree.tostring(mdtree, pretty_print=True, xml_declaration=True)
|
||||
record["xml"] = etree.tostring(mdtree, pretty_print=True, encoding=unicode)
|
||||
except TypeError:
|
||||
# API incompatibilities between different flavours of elementtree
|
||||
try:
|
||||
record["xml"] = etree.tostring(mdtree)
|
||||
record["xml"] = etree.tostring(mdtree, pretty_print=True, encoding=unicode)
|
||||
except AssertionError:
|
||||
record["xml"] = etree.tostring(md)
|
||||
record["xml"] = etree.tostring(md, pretty_print=True, encoding=unicode)
|
||||
|
||||
record["xml"] = '<?xml version="1.0" encoding="UTF-8"?>\n' + record["xml"]
|
||||
record["tree"] = mdtree
|
||||
return record
|
||||
|
|
|
@ -603,6 +603,13 @@ class GeminiDocument(MappedXmlDocument):
|
|||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
GeminiResourceLocator(
|
||||
name="resource-locator-identification",
|
||||
search_paths=[
|
||||
"gmd:identificationInfo//gmd:CI_OnlineResource",
|
||||
],
|
||||
multiplicity="*",
|
||||
),
|
||||
GeminiElement(
|
||||
name="conformity-specification",
|
||||
search_paths=[
|
||||
|
|
|
@ -449,7 +449,6 @@ class TestHarvest(HarvestFixtureBase):
|
|||
message = job.gather_errors[0].message
|
||||
|
||||
assert_in('Validation error', message)
|
||||
assert_in('Validating against "GEMINI 2.1 Schematron 1.2" profile failed', message)
|
||||
assert_in('One email address shall be provided', message)
|
||||
assert_in('Service type shall be one of \'discovery\', \'view\', \'download\', \'transformation\', \'invoke\' or \'other\' following INSPIRE generic names', message)
|
||||
assert_in('Limitations on public access code list value shall be \'otherRestrictions\'', message)
|
||||
|
|
|
@ -9,13 +9,15 @@ from ckanext.spatial import validation
|
|||
|
||||
class TestValidation:
|
||||
|
||||
def _get_file_path(self, file_name):
|
||||
return os.path.join(os.path.dirname(__file__), 'xml', file_name)
|
||||
|
||||
def get_validation_errors(self, validator, validation_test_filename):
|
||||
validation_test_filepath = os.path.join(os.path.dirname(__file__),
|
||||
'xml',
|
||||
validation_test_filename)
|
||||
validation_test_filepath = self._get_file_path(validation_test_filename)
|
||||
xml = etree.parse(validation_test_filepath)
|
||||
is_valid, errors = validator.is_valid(xml)
|
||||
return ';'.join(errors)
|
||||
|
||||
return ';'.join([e[0] for e in errors])
|
||||
|
||||
def test_iso19139_failure(self):
|
||||
errors = self.get_validation_errors(validation.ISO19139Schema,
|
||||
|
@ -117,7 +119,7 @@ class TestValidation:
|
|||
'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml')
|
||||
assert len(errors) > 0
|
||||
assert_in('(gmx.xsd)', errors)
|
||||
assert_in('(u"Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors)
|
||||
assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors)
|
||||
|
||||
def test_schematron_error_extraction(self):
|
||||
validation_error_xml = '''
|
||||
|
@ -137,3 +139,15 @@ class TestValidation:
|
|||
assert_in("srv:serviceType/*[1] = 'discovery'", details)
|
||||
assert_in("/*[local-name()='MD_Metadata'", details)
|
||||
assert_in("Service type shall be one of 'discovery'", details)
|
||||
|
||||
|
||||
def test_error_line_numbers(self):
|
||||
file_path = self._get_file_path('iso19139/dataset-invalid.xml')
|
||||
xml = etree.parse(file_path)
|
||||
is_valid, profile, errors = validation.Validators(profiles=['iso19139']).is_valid(xml)
|
||||
assert not is_valid
|
||||
assert len(errors) == 2
|
||||
|
||||
message, line = errors[1]
|
||||
assert 'This element is not expected' in message
|
||||
assert line == 3
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
from pkg_resources import resource_stream, resource_filename
|
||||
from pkg_resources import resource_stream
|
||||
from ckanext.spatial.model import GeminiDocument
|
||||
|
||||
from lxml import etree
|
||||
|
@ -15,8 +15,12 @@ class BaseValidator(object):
|
|||
def is_valid(cls, xml):
|
||||
'''
|
||||
Runs the validation on the supplied XML etree.
|
||||
Returns a tuple, the first value is a boolean indicating
|
||||
whether the validation passed or not. The second is a list of tuples,
|
||||
each containing the error message and the error line.
|
||||
|
||||
Returns tuple:
|
||||
(is_valid, error_message_list)
|
||||
(is_valid, [(error_message_string, error_line_number)])
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -26,7 +30,9 @@ class XsdValidator(BaseValidator):
|
|||
@classmethod
|
||||
def _is_valid(cls, xml, xsd_filepath, xsd_name):
|
||||
'''Returns whether or not an XML file is valid according to
|
||||
an XSD.
|
||||
an XSD. Returns a tuple, the first value is a boolean indicating
|
||||
whether the validation passed or not. The second is a list of tuples,
|
||||
each containing the error message and the error line.
|
||||
|
||||
Params:
|
||||
xml - etree of the XML to be validated
|
||||
|
@ -34,7 +40,7 @@ class XsdValidator(BaseValidator):
|
|||
xsd_name - string describing the XSD
|
||||
|
||||
Returns:
|
||||
(is_valid_boolean, list_of_error_message_strings)
|
||||
(is_valid, [(error_message_string, error_line_number)])
|
||||
'''
|
||||
xsd = etree.parse(xsd_filepath)
|
||||
schema = etree.XMLSchema(xsd)
|
||||
|
@ -44,12 +50,13 @@ class XsdValidator(BaseValidator):
|
|||
# XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml/3.2}doubleList'., line 118
|
||||
try:
|
||||
schema.assertValid(xml)
|
||||
except AssertionError, e:
|
||||
msg = '%s Schema Error: %s' % (xsd_name, e.args)
|
||||
return False, [msg]
|
||||
except etree.DocumentInvalid, e:
|
||||
msg = '%s Validation Error: %s' % (xsd_name, e.args)
|
||||
return False, [msg]
|
||||
except etree.DocumentInvalid:
|
||||
log.info('Validation errors found using schema {0}'.format(xsd_name))
|
||||
errors = []
|
||||
for error in schema.error_log:
|
||||
errors.append((error.message, error.line))
|
||||
errors.insert
|
||||
return False, errors
|
||||
return True, []
|
||||
|
||||
|
||||
|
@ -62,7 +69,11 @@ class ISO19139Schema(XsdValidator):
|
|||
xsd_path = 'xml/iso19139'
|
||||
gmx_xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||
xsd_path, 'gmx/gmx.xsd')
|
||||
is_valid, errors = cls._is_valid(xml, gmx_xsd_filepath, 'Dataset schema (gmx.xsd)')
|
||||
xsd_name = 'Dataset schema (gmx.xsd)'
|
||||
is_valid, errors = cls._is_valid(xml, gmx_xsd_filepath, xsd_name)
|
||||
if not is_valid:
|
||||
#TODO: not sure if we need this one, keeping for backwards compatibility
|
||||
errors.insert(0, ('{0} Validation Error'.format(xsd_name), None))
|
||||
return is_valid, errors
|
||||
|
||||
class ISO19139EdenSchema(XsdValidator):
|
||||
|
@ -78,15 +89,23 @@ class ISO19139EdenSchema(XsdValidator):
|
|||
if metadata_type in ('dataset', 'series'):
|
||||
gmx_xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||
xsd_path, 'gmx/gmx.xsd')
|
||||
is_valid, errors = cls._is_valid(xml, gmx_xsd_filepath, 'Dataset schema (gmx.xsd)')
|
||||
xsd_name = 'Dataset schema (gmx.xsd)'
|
||||
is_valid, errors = cls._is_valid(xml, gmx_xsd_filepath, xsd_name)
|
||||
if not is_valid:
|
||||
#TODO: not sure if we need this one, keeping for backwards compatibility
|
||||
errors.insert(0, ('{0} Validation Error'.format(xsd_name), None))
|
||||
elif metadata_type == 'service':
|
||||
gmx_and_srv_xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||
xsd_path, 'gmx_and_srv.xsd')
|
||||
is_valid, errors = cls._is_valid(xml, gmx_and_srv_xsd_filepath, 'Service schemas (gmx.xsd & srv.xsd)')
|
||||
xsd_name = 'Service schemas (gmx.xsd & srv.xsd)'
|
||||
is_valid, errors = cls._is_valid(xml, gmx_and_srv_xsd_filepath, xsd_name)
|
||||
if not is_valid:
|
||||
#TODO: not sure if we need this one, keeping for backwards compatibility
|
||||
errors.insert(0, ('{0} Validation Error'.format(xsd_name), None))
|
||||
else:
|
||||
is_valid = False
|
||||
errors = ['Metadata type not recognised "%s" - cannot choose an ISO19139 validator.' %
|
||||
metadata_type]
|
||||
errors = [('Metadata type not recognised "%s" - cannot choose an ISO19139 validator.' %
|
||||
metadata_type, None)]
|
||||
if is_valid:
|
||||
return True, []
|
||||
|
||||
|
@ -121,12 +140,7 @@ class ISO19139NGDCSchema(XsdValidator):
|
|||
|
||||
xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||
xsd_path, 'schema.xsd')
|
||||
is_valid, errors = cls._is_valid(xml, xsd_filepath, 'NGDC Schema (schema.xsd)')
|
||||
|
||||
if is_valid:
|
||||
return True, []
|
||||
|
||||
return False, errors
|
||||
return cls._is_valid(xml, xsd_filepath, 'NGDC Schema (schema.xsd)')
|
||||
|
||||
class FGDCSchema(XsdValidator):
|
||||
'''
|
||||
|
@ -147,12 +161,8 @@ class FGDCSchema(XsdValidator):
|
|||
|
||||
xsd_filepath = os.path.join(os.path.dirname(__file__),
|
||||
xsd_path, 'fgdc-std-001-1998.xsd')
|
||||
is_valid, errors = cls._is_valid(xml, xsd_filepath, 'FGDC Schema (fgdc-std-001-1998.xsd)')
|
||||
return cls._is_valid(xml, xsd_filepath, 'FGDC Schema (fgdc-std-001-1998.xsd)')
|
||||
|
||||
if is_valid:
|
||||
return True, []
|
||||
|
||||
return False, errors
|
||||
|
||||
class SchematronValidator(BaseValidator):
|
||||
'''Base class for a validator that uses Schematron.'''
|
||||
|
@ -166,6 +176,19 @@ class SchematronValidator(BaseValidator):
|
|||
|
||||
@classmethod
|
||||
def is_valid(cls, xml):
|
||||
'''Returns whether or not an XML file is valid according to
|
||||
a schematron. Returns a tuple, the first value is a boolean indicating
|
||||
whether the validation passed or not. The second is a list of tuples,
|
||||
each containing the error message and the error line (which defaults to
|
||||
None on the schematron validation case).
|
||||
|
||||
Params:
|
||||
xml - etree of the XML to be validated
|
||||
|
||||
Returns:
|
||||
(is_valid, [(error_message_string, error_line_number)])
|
||||
'''
|
||||
|
||||
if not hasattr(cls, 'schematrons'):
|
||||
log.info('Compiling schematron "%s"', cls.title)
|
||||
cls.schematrons = cls.get_schematrons()
|
||||
|
@ -180,7 +203,8 @@ class SchematronValidator(BaseValidator):
|
|||
for error in errors:
|
||||
message, details = cls.extract_error_details(error)
|
||||
if not message in messages_already_reported:
|
||||
error_details.append(details)
|
||||
#TODO: perhaps can extract the source line from the error location
|
||||
error_details.append((details,None))
|
||||
messages_already_reported.add(message)
|
||||
return False, error_details
|
||||
return True, []
|
||||
|
@ -198,7 +222,8 @@ class SchematronValidator(BaseValidator):
|
|||
location = failed_assert_element.get('location')
|
||||
message_element = failed_assert_element.find("{http://purl.oclc.org/dsdl/svrl}text")
|
||||
message = message_element.text.strip()
|
||||
failed_assert_element
|
||||
|
||||
#TODO: Do we really need such detail on the error messages?
|
||||
return message, 'Error Message: "%s" Error Location: "%s" Error Assert: "%s"' % (message, location, assert_)
|
||||
|
||||
@classmethod
|
||||
|
@ -256,26 +281,44 @@ class Validators(object):
|
|||
def __init__(self, profiles=["iso19139", "constraints", "gemini2"]):
|
||||
self.profiles = profiles
|
||||
|
||||
self.validators = {} # name: class
|
||||
for validator_class in all_validators:
|
||||
self.validators[validator_class.name] = validator_class
|
||||
|
||||
def add_validator(self, validator_class):
|
||||
self.validators[validator_class.name] = validator_class
|
||||
|
||||
def isvalid(self, xml):
|
||||
'''For backward compatibility'''
|
||||
return self.is_valid(xml)
|
||||
|
||||
def is_valid(self, xml):
|
||||
if not hasattr(self, 'validators'):
|
||||
self.validators = {} # name: class
|
||||
for validator_class in all_validators:
|
||||
self.validators[validator_class.name] = validator_class
|
||||
'''Returns whether or not an XML file is valid.
|
||||
Returns a tuple, the first value is a boolean indicating
|
||||
whether the validation passed or not. The second is the name of the profile
|
||||
that failed and the third is a list of tuples,
|
||||
each containing the error message and the error line if present.
|
||||
|
||||
Params:
|
||||
xml - etree of the XML to be validated
|
||||
|
||||
Returns:
|
||||
(is_valid, failed_profile_name, [(error_message_string, error_line_number)])
|
||||
'''
|
||||
|
||||
|
||||
log.debug('Starting validation against profile(s) %s' % ','.join(self.profiles))
|
||||
for name in self.profiles:
|
||||
validator = self.validators[name]
|
||||
is_valid, error_message_list = validator.is_valid(xml)
|
||||
if not is_valid:
|
||||
error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title)
|
||||
log.info('%r', error_message_list)
|
||||
return False, error_message_list
|
||||
log.info('Validated against "%s"', validator.title)
|
||||
#error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title)
|
||||
log.info('Validating against "%s" profile failed' % validator.title)
|
||||
log.debug('%r', error_message_list)
|
||||
return False, validator.name, error_message_list
|
||||
log.debug('Validated against "%s"', validator.title)
|
||||
log.info('Validation passed')
|
||||
return True, []
|
||||
return True, None, []
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sys import argv
|
||||
|
|
Loading…
Reference in New Issue