spatial-d4science/ckanext/spatial/util.py

206 lines
5.6 KiB
Python
Raw Normal View History

2019-12-11 13:22:28 +01:00
# -*- coding: utf-8 -*-
2019-12-11 13:23:03 +01:00
from __future__ import print_function
2019-12-11 13:22:28 +01:00
import os
import sys
2020-04-14 23:06:11 +02:00
import six
from pkg_resources import resource_stream
2019-12-11 13:22:28 +01:00
import logging
from ckan.lib.helpers import json
from lxml import etree
from pprint import pprint
from ckan import model
2019-12-11 13:22:28 +01:00
from ckanext.spatial.lib import save_package_extent
from ckanext.spatial.lib.reports import validation_report
from ckanext.spatial.harvesters import SpatialHarvester
from ckanext.spatial.model import ISODocument
from ckantoolkit import config
2019-12-11 13:22:28 +01:00
log = logging.getLogger(__name__)
def report(pkg=None):
if pkg:
2020-04-14 23:06:11 +02:00
package_ref = six.text_type(pkg)
2019-12-11 13:22:28 +01:00
pkg = model.Package.get(package_ref)
if not pkg:
2019-12-11 13:23:03 +01:00
print('Package ref "%s" not recognised' % package_ref)
2019-12-11 13:22:28 +01:00
sys.exit(1)
report = validation_report(package_id=pkg.id)
for row in report.get_rows_html_formatted():
2019-12-11 13:23:03 +01:00
print()
2019-12-11 13:22:28 +01:00
for i, col_name in enumerate(report.column_names):
2019-12-11 13:23:03 +01:00
print(' %s: %s' % (col_name, row[i]))
2019-12-11 13:22:28 +01:00
def validate_file(metadata_filepath):
if not os.path.exists(metadata_filepath):
2019-12-11 13:23:03 +01:00
print('Filepath %s not found' % metadata_filepath)
2019-12-11 13:22:28 +01:00
sys.exit(1)
with open(metadata_filepath, 'rb') as f:
metadata_xml = f.read()
validators = SpatialHarvester()._get_validator()
2019-12-11 13:23:03 +01:00
print('Validators: %r' % validators.profiles)
2019-12-11 13:22:28 +01:00
try:
xml_string = metadata_xml.encode("utf-8")
2019-12-11 13:23:03 +01:00
except UnicodeDecodeError as e:
print('ERROR: Unicode Error reading file \'%s\': %s' % \
(metadata_filepath, e))
2019-12-11 13:22:28 +01:00
sys.exit(1)
xml = etree.fromstring(xml_string)
# XML validation
valid, errors = validators.is_valid(xml)
# CKAN read of values
if valid:
try:
iso_document = ISODocument(xml_string)
iso_values = iso_document.read_values()
2019-12-11 13:23:03 +01:00
except Exception as e:
2019-12-11 13:22:28 +01:00
valid = False
errors.append(
'CKAN exception reading values from ISODocument: %s' % e)
2019-12-11 13:23:03 +01:00
print('***************')
print('Summary')
print('***************')
print('File: \'%s\'' % metadata_filepath)
print('Valid: %s' % valid)
2019-12-11 13:22:28 +01:00
if not valid:
2019-12-11 13:23:03 +01:00
print('Errors:')
print(pprint(errors))
print('***************')
2019-12-11 13:22:28 +01:00
def report_csv(csv_filepath):
from ckanext.spatial.lib.reports import validation_report
report = validation_report()
with open(csv_filepath, 'wb') as f:
f.write(report.get_csv())
def initdb(srid=None):
if srid:
2020-04-14 23:06:11 +02:00
srid = six.text_type(srid)
2019-12-11 13:22:28 +01:00
from ckanext.spatial.model import setup as db_setup
db_setup(srid)
2019-12-11 13:23:03 +01:00
print('DB tables created')
2019-12-11 13:22:28 +01:00
def update_extents():
from ckan.model import PackageExtra, Package, Session
conn = Session.connection()
packages = [extra.package \
for extra in \
Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()]
errors = []
count = 0
for package in packages:
try:
value = package.extras['spatial']
log.debug('Received: %r' % value)
geometry = json.loads(value)
count += 1
2019-12-11 13:23:03 +01:00
except ValueError as e:
2019-12-11 13:22:28 +01:00
errors.append(u'Package %s - Error decoding JSON object: %s' %
2020-04-14 23:06:11 +02:00
(package.id, six.text_type(e)))
2019-12-11 13:23:03 +01:00
except TypeError as e:
2019-12-11 13:22:28 +01:00
errors.append(u'Package %s - Error decoding JSON object: %s' %
2020-04-14 23:06:11 +02:00
(package.id, six.text_type(e)))
2019-12-11 13:22:28 +01:00
save_package_extent(package.id, geometry)
Session.commit()
if errors:
msg = 'Errors were found:\n%s' % '\n'.join(errors)
2019-12-11 13:23:03 +01:00
print(msg)
2019-12-11 13:22:28 +01:00
msg = "Done. Extents generated for %i out of %i packages" % (count,
len(packages))
2019-12-11 13:23:03 +01:00
print(msg)
def get_xslt(original=False):
if original:
config_option = \
'ckanext.spatial.harvest.xslt_html_content_original'
else:
config_option = 'ckanext.spatial.harvest.xslt_html_content'
xslt_package = None
xslt_path = None
xslt = config.get(config_option, None)
if xslt:
if ':' in xslt:
xslt = xslt.split(':')
xslt_package = xslt[0]
xslt_path = xslt[1]
else:
log.error(
'XSLT should be defined in the form <package>:<path>'
', eg ckanext.myext:templates/my.xslt')
return xslt_package, xslt_path
def get_harvest_object_original_content(id):
from ckanext.harvest.model import HarvestObject, HarvestObjectExtra
extra = model.Session.query(
HarvestObjectExtra
).join(HarvestObject).filter(HarvestObject.id == id).filter(
HarvestObjectExtra.key == 'original_document'
).first()
if extra:
return extra.value
else:
return None
def get_harvest_object_content(id):
from ckanext.harvest.model import HarvestObject
obj = model.Session.query(HarvestObject).filter(HarvestObject.id == id).first()
if obj:
return obj.content
else:
return None
def _transform_to_html(content, xslt_package=None, xslt_path=None):
xslt_package = xslt_package or __name__
xslt_path = xslt_path or \
'../templates/ckanext/spatial/gemini2-html-stylesheet.xsl'
# optimise -- read transform only once and compile rather
# than at each request
with resource_stream(xslt_package, xslt_path) as style:
style_xml = etree.parse(style)
transformer = etree.XSLT(style_xml)
xml = etree.parse(six.StringIO(content and six.text_type(content)))
html = transformer(xml)
result = etree.tostring(html, pretty_print=True)
return result