2012-10-19 19:20:32 +02:00
|
|
|
import sys
|
|
|
|
import re
|
2012-12-05 12:42:57 +01:00
|
|
|
import os
|
2012-10-19 19:20:32 +02:00
|
|
|
from pprint import pprint
|
|
|
|
import logging
|
|
|
|
|
|
|
|
from lxml import etree
|
|
|
|
|
|
|
|
from ckan.lib.cli import CkanCommand
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
class Validation(CkanCommand):
|
|
|
|
'''Validation commands
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
validation report [package-name]
|
|
|
|
Performs validation on the harvested metadata, either for all
|
2012-12-05 12:42:57 +01:00
|
|
|
packages or the one specified.
|
2012-10-19 19:20:32 +02:00
|
|
|
|
|
|
|
validation report-csv <filename>.csv
|
2012-12-05 12:42:57 +01:00
|
|
|
Performs validation on all the harvested metadata in the db and
|
|
|
|
writes a report in CSV format to the given filepath.
|
2012-10-19 19:20:32 +02:00
|
|
|
|
2012-12-05 12:42:57 +01:00
|
|
|
validation file <filename>.xml
|
|
|
|
Performs validation on the given metadata file.
|
2012-10-19 19:20:32 +02:00
|
|
|
'''
|
|
|
|
summary = __doc__.split('\n')[0]
|
|
|
|
usage = __doc__
|
|
|
|
max_args = 3
|
|
|
|
min_args = 0
|
|
|
|
|
|
|
|
def command(self):
|
|
|
|
if not self.args or self.args[0] in ['--help', '-h', 'help']:
|
|
|
|
print self.usage
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
self._load_config()
|
|
|
|
|
|
|
|
cmd = self.args[0]
|
|
|
|
if cmd == 'report':
|
|
|
|
self.report()
|
|
|
|
elif cmd == 'report-csv':
|
|
|
|
self.report_csv()
|
2012-12-05 12:42:57 +01:00
|
|
|
elif cmd == 'file':
|
|
|
|
self.validate_file()
|
2012-10-19 19:20:32 +02:00
|
|
|
else:
|
|
|
|
print 'Command %s not recognized' % cmd
|
|
|
|
|
|
|
|
def report(self):
|
|
|
|
from ckan import model
|
|
|
|
from ckanext.harvest.model import HarvestObject
|
|
|
|
from ckanext.spatial.lib.reports import validation_report
|
|
|
|
|
|
|
|
if len(self.args) >= 2:
|
|
|
|
package_ref = unicode(self.args[1])
|
|
|
|
pkg = model.Package.get(package_ref)
|
|
|
|
if not pkg:
|
|
|
|
print 'Package ref "%s" not recognised' % package_ref
|
|
|
|
sys.exit(1)
|
|
|
|
else:
|
|
|
|
pkg = None
|
|
|
|
|
|
|
|
report = validation_report(package_id=pkg.id)
|
|
|
|
for row in report.get_rows_html_formatted():
|
|
|
|
print
|
|
|
|
for i, col_name in enumerate(report.column_names):
|
|
|
|
print ' %s: %s' % (col_name, row[i])
|
|
|
|
|
2012-12-05 12:42:57 +01:00
|
|
|
def validate_file(self):
|
|
|
|
from ckanext.spatial.harvesters import SpatialHarvester
|
2013-02-13 20:16:36 +01:00
|
|
|
from ckanext.spatial.model import ISODocument
|
2012-12-05 12:42:57 +01:00
|
|
|
|
|
|
|
if len(self.args) > 2:
|
|
|
|
print 'Too many parameters %i' % len(self.args)
|
|
|
|
sys.exit(1)
|
|
|
|
if len(self.args) < 2:
|
|
|
|
print 'Not enough parameters %i' % len(self.args)
|
|
|
|
sys.exit(1)
|
|
|
|
metadata_filepath = self.args[1]
|
|
|
|
if not os.path.exists(metadata_filepath):
|
|
|
|
print 'Filepath %s not found' % metadata_filepath
|
|
|
|
sys.exit(1)
|
|
|
|
with open(metadata_filepath, 'rb') as f:
|
|
|
|
metadata_xml = f.read()
|
|
|
|
|
|
|
|
validators = SpatialHarvester()._get_validator()
|
|
|
|
print 'Validators: %r' % validators.profiles
|
2013-01-21 18:22:24 +01:00
|
|
|
try:
|
|
|
|
xml_string = metadata_xml.encode("utf-8")
|
|
|
|
except UnicodeDecodeError, e:
|
|
|
|
print 'ERROR: Unicode Error reading file \'%s\': %s' % \
|
|
|
|
(metadata_filepath, e)
|
|
|
|
sys.exit(1)
|
|
|
|
#import pdb; pdb.set_trace()
|
|
|
|
xml = etree.fromstring(xml_string)
|
|
|
|
|
|
|
|
# XML validation
|
2012-12-05 12:42:57 +01:00
|
|
|
valid, errors = validators.is_valid(xml)
|
2013-01-21 18:22:24 +01:00
|
|
|
|
|
|
|
# CKAN read of values
|
|
|
|
if valid:
|
|
|
|
try:
|
2013-02-13 20:16:36 +01:00
|
|
|
iso_document = ISODocument(xml_string)
|
|
|
|
iso_values = iso_document.read_values()
|
2013-01-21 18:22:24 +01:00
|
|
|
except Exception, e:
|
|
|
|
valid = False
|
2013-02-13 20:16:36 +01:00
|
|
|
errors.append('CKAN exception reading values from ISODocument: %s' % e)
|
2013-01-21 18:22:24 +01:00
|
|
|
|
|
|
|
print '***************'
|
|
|
|
print 'Summary'
|
|
|
|
print '***************'
|
|
|
|
print 'File: \'%s\'' % metadata_filepath
|
2012-12-05 12:42:57 +01:00
|
|
|
print 'Valid: %s' % valid
|
|
|
|
if not valid:
|
|
|
|
print 'Errors:'
|
|
|
|
print pprint(errors)
|
2013-01-21 18:22:24 +01:00
|
|
|
print '***************'
|
2012-12-05 12:42:57 +01:00
|
|
|
|
2012-10-19 19:20:32 +02:00
|
|
|
def report_csv(self):
|
|
|
|
from ckanext.spatial.lib.reports import validation_report
|
|
|
|
if len(self.args) != 2:
|
|
|
|
print 'Wrong number of arguments'
|
|
|
|
sys.exit(1)
|
|
|
|
csv_filepath = self.args[1]
|
|
|
|
report = validation_report()
|
|
|
|
with open(csv_filepath, 'wb') as f:
|
|
|
|
f.write(report.get_csv())
|