[#10] Improve harvested metadata API
Some improvements on the endpoints that return the contents of the harvest objects: * Nicer URLs with redirects to the old ones * Returning the raw harvest object content is available on the main harvest extension, so just redirect there * Support for showing the original document of a harvest object, if present * Suport for defining a custom XSLT for the HTML view, via ckanext.spatial.harvest.xslt_html_content ckanext.spatial.harvest.xslt_html_content_original
This commit is contained in:
parent
8647f90cb6
commit
cc60327d0b
24
README.rst
24
README.rst
|
@ -212,9 +212,29 @@ Enabled with the ``ckan.plugins = spatial_harvest_metadata_api`` (previous known
|
|||
|
||||
To view the harvest objects (containing the harvested metadata) in the web interface, these controller locations are added:
|
||||
|
||||
/api/2/rest/harvestobject/<id>/xml
|
||||
* raw XML document: /harvest/object/{id}
|
||||
* HTML representation: /harvest/object/{id}/html
|
||||
|
||||
/api/2/rest/harvestobject/<id>/html
|
||||
.. note::
|
||||
The old URLs are now deprecated and redirect to the previously defined.
|
||||
|
||||
/api/2/rest/harvestobject/<id>/xml
|
||||
/api/2/rest/harvestobject/<id>/html
|
||||
|
||||
|
||||
For those harvest objects that have an original document (which was transformed to ISO), this can be accessed via:
|
||||
|
||||
* raw XML document: /harvest/object/{id}/original
|
||||
* HTML representation: /harvest/object/{id}/html/original
|
||||
|
||||
The HTML representation is created via an XSLT transformation. The extension provides an XSLT file that should work
|
||||
on ISO 19139 based documents, but if you want to use your own on your extension, you can override it using
|
||||
the following configuration options::
|
||||
|
||||
ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt
|
||||
ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt
|
||||
|
||||
If your project does not transform different metadata types you can ignore the second option.
|
||||
|
||||
|
||||
CSW Client
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import logging
|
||||
|
||||
try: from cStringIO import StringIO
|
||||
except ImportError: from StringIO import StringIO
|
||||
|
||||
|
@ -10,9 +12,10 @@ from ckan.lib.base import request, config, abort
|
|||
from ckan.controllers.api import ApiController as BaseApiController
|
||||
from ckan.model import Session
|
||||
|
||||
from ckanext.harvest.model import HarvestObject
|
||||
from ckanext.harvest.model import HarvestObject, HarvestObjectExtra
|
||||
from ckanext.spatial.lib import get_srid, validate_bbox, bbox_query
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class ApiController(BaseApiController):
|
||||
|
||||
|
@ -46,33 +49,91 @@ class ApiController(BaseApiController):
|
|||
|
||||
class HarvestMetadataApiController(BaseApiController):
|
||||
|
||||
def _get_harvest_object(self,id):
|
||||
def _get_content(self, id):
|
||||
|
||||
obj = Session.query(HarvestObject) \
|
||||
.filter(HarvestObject.id==id).first()
|
||||
return obj
|
||||
if obj:
|
||||
return obj.content
|
||||
else:
|
||||
return None
|
||||
|
||||
def display_xml(self,id):
|
||||
obj = self._get_harvest_object(id)
|
||||
def _get_original_content(self, id):
|
||||
extra = Session.query(HarvestObjectExtra).join(HarvestObject) \
|
||||
.filter(HarvestObject.id==id) \
|
||||
.filter(HarvestObjectExtra.key=='original_document').first()
|
||||
if extra:
|
||||
return extra.value
|
||||
else:
|
||||
return None
|
||||
|
||||
if obj is None:
|
||||
abort(404)
|
||||
response.content_type = "application/xml"
|
||||
response.headers["Content-Length"] = len(obj.content)
|
||||
return obj.content
|
||||
def _transform_to_html(self, content, xslt_package=None, xslt_path=None):
|
||||
|
||||
def display_html(self,id):
|
||||
obj = self._get_harvest_object(id)
|
||||
xslt_package = xslt_package or 'ckanext.spatial'
|
||||
xslt_path = xslt_path or 'templates/ckanext/spatial/gemini2-html-stylesheet.xsl'
|
||||
|
||||
if obj is None:
|
||||
abort(404)
|
||||
## optimise -- read transform only once and compile rather
|
||||
## than at each request
|
||||
with resource_stream("ckanext.spatial",
|
||||
"templates/ckanext/spatial/gemini2-html-stylesheet.xsl") as style:
|
||||
with resource_stream(xslt_package, xslt_path) as style:
|
||||
style_xml = etree.parse(style)
|
||||
transformer = etree.XSLT(style_xml)
|
||||
xml = etree.parse(StringIO(obj.content.encode("utf-8")))
|
||||
html = transformer(xml)
|
||||
return etree.tostring(html, pretty_print=True)
|
||||
|
||||
xml = etree.parse(StringIO(content.encode('utf-8')))
|
||||
html = transformer(xml)
|
||||
|
||||
response.headers['Content-Type'] = 'text/html; charset=utf-8'
|
||||
response.headers['Content-Length'] = len(content)
|
||||
|
||||
result = etree.tostring(html, pretty_print=True)
|
||||
|
||||
return result
|
||||
|
||||
def _get_xslt(self, original=False):
|
||||
|
||||
if original:
|
||||
config_option = 'ckanext.spatial.harvest.xslt_html_content_original'
|
||||
else:
|
||||
config_option = 'ckanext.spatial.harvest.xslt_html_content'
|
||||
|
||||
xslt_package = None
|
||||
xslt_path = None
|
||||
xslt = config.get(config_option, None)
|
||||
if xslt:
|
||||
if ':' in xslt:
|
||||
xslt = xslt.split(':')
|
||||
xslt_package = xslt[0]
|
||||
xslt_path = xslt[1]
|
||||
else:
|
||||
log.error('XSLT should be defined in the form <package>:<path>' +
|
||||
', eg ckanext.myext:templates/my.xslt')
|
||||
|
||||
return xslt_package, xslt_path
|
||||
|
||||
def display_xml_original(self, id):
|
||||
content = self._get_original_content(id)
|
||||
|
||||
if not content:
|
||||
abort(404)
|
||||
|
||||
response.headers['Content-Type'] = 'application/xml; charset=utf-8'
|
||||
response.headers['Content-Length'] = len(content)
|
||||
|
||||
return content.encode('utf-8')
|
||||
|
||||
def display_html(self,id):
|
||||
content = self._get_content(id)
|
||||
|
||||
if not content:
|
||||
abort(404)
|
||||
|
||||
xslt_package, xslt_path = self._get_xslt()
|
||||
return self._transform_to_html(content, xslt_package, xslt_path)
|
||||
|
||||
def display_html_original(self, id):
|
||||
content = self._get_original_content(id)
|
||||
|
||||
if content is None:
|
||||
abort(404)
|
||||
|
||||
xslt_package, xslt_path = self._get_xslt(original=True)
|
||||
return self._transform_to_html(content, xslt_package, xslt_path)
|
||||
|
|
|
@ -267,10 +267,24 @@ class HarvestMetadataApi(p.SingletonPlugin):
|
|||
def before_map(self, route_map):
|
||||
controller = "ckanext.spatial.controllers.api:HarvestMetadataApiController"
|
||||
|
||||
route_map.connect("/api/2/rest/harvestobject/:id/xml", controller=controller,
|
||||
action="display_xml")
|
||||
route_map.connect("/api/2/rest/harvestobject/:id/html", controller=controller,
|
||||
action="display_html")
|
||||
# Showing the harvest object content is an action of the default
|
||||
# harvest plugin, so just redirect there
|
||||
route_map.redirect('/api/2/rest/harvestobject/{id:.*}/xml',
|
||||
'/harvest/object/{id}',
|
||||
_redirect_code='301 Moved Permanently')
|
||||
|
||||
route_map.connect('/harvest/object/{id}/original', controller=controller,
|
||||
action='display_xml_original')
|
||||
|
||||
route_map.connect('/harvest/object/{id}/html', controller=controller,
|
||||
action='display_html')
|
||||
route_map.connect('/harvest/object/{id}/html/original', controller=controller,
|
||||
action='display_html_original')
|
||||
|
||||
# Redirect old URL to a nicer and unversioned one
|
||||
route_map.redirect('/api/2/rest/harvestobject/:id/html',
|
||||
'/harvest/object/{id}/html',
|
||||
_redirect_code='301 Moved Permanently')
|
||||
|
||||
return route_map
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
import json
|
||||
from pprint import pprint
|
||||
from nose.plugins.skip import SkipTest
|
||||
from nose.tools import assert_equal, assert_raises
|
||||
from ckan.logic.action.create import package_create
|
||||
from ckan.logic.action.delete import package_delete
|
||||
|
@ -125,3 +126,92 @@ class TestActionPackageSearch(SpatialTestBase,WsgiAppCase):
|
|||
assert_equal(result['count'], 1)
|
||||
assert_equal(result['results'][0]['name'], 'test-spatial-dataset-search-point-2')
|
||||
|
||||
|
||||
class TestHarvestedMetadataAPI(WsgiAppCase):
|
||||
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
try:
|
||||
from ckanext.harvest.model import HarvestObject, HarvestJob, HarvestSource, HarvestObjectExtra
|
||||
except ImportError:
|
||||
raise SkipTest('The harvester extension is needed for these tests')
|
||||
|
||||
cls.content1 = '<xml>Content 1</xml>'
|
||||
ho1 = HarvestObject(guid='test-ho-1',
|
||||
job=HarvestJob(source=HarvestSource(url='http://', type='xx')),
|
||||
content=cls.content1)
|
||||
|
||||
cls.content2 = '<xml>Content 2</xml>'
|
||||
cls.original_content2 = '<xml>Original Content 2</xml>'
|
||||
ho2 = HarvestObject(guid='test-ho-2',
|
||||
job=HarvestJob(source=HarvestSource(url='http://', type='xx')),
|
||||
content=cls.content2)
|
||||
|
||||
hoe = HarvestObjectExtra(key='original_document',
|
||||
value=cls.original_content2,
|
||||
object=ho2)
|
||||
|
||||
Session.add(ho1)
|
||||
Session.add(ho2)
|
||||
Session.add(hoe)
|
||||
Session.commit()
|
||||
|
||||
cls.object_id_1 = ho1.id
|
||||
cls.object_id_2 = ho2.id
|
||||
|
||||
|
||||
def test_api(self):
|
||||
|
||||
# Test redirects for old URLs
|
||||
url = '/api/2/rest/harvestobject/{0}/xml'.format(self.object_id_1)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 301
|
||||
assert '/harvest/object/{0}'.format(self.object_id_1) in r.header_dict['Location']
|
||||
|
||||
url = '/api/2/rest/harvestobject/{0}/html'.format(self.object_id_1)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 301
|
||||
assert '/harvest/object/{0}/html'.format(self.object_id_1) in r.header_dict['Location']
|
||||
|
||||
|
||||
# Access object content
|
||||
url = '/harvest/object/{0}'.format(self.object_id_1)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 200
|
||||
assert r.header_dict['Content-Type'] == 'application/xml; charset=utf-8'
|
||||
assert r.body == self.content1
|
||||
|
||||
# Access original content in object extra (if present)
|
||||
url = '/harvest/object/{0}/original'.format(self.object_id_1)
|
||||
r = self.app.get(url, status=404)
|
||||
assert r.status == 404
|
||||
|
||||
url = '/harvest/object/{0}/original'.format(self.object_id_2)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 200
|
||||
assert r.header_dict['Content-Type'] == 'application/xml; charset=utf-8'
|
||||
assert r.body == self.original_content2
|
||||
|
||||
# Access HTML transformation
|
||||
url = '/harvest/object/{0}/html'.format(self.object_id_1)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 200
|
||||
assert r.header_dict['Content-Type'] == 'text/html; charset=utf-8'
|
||||
assert 'GEMINI record about' in r.body
|
||||
|
||||
url = '/harvest/object/{0}/html/original'.format(self.object_id_1)
|
||||
r = self.app.get(url, status=404)
|
||||
assert r.status == 404
|
||||
|
||||
url = '/harvest/object/{0}/html'.format(self.object_id_2)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 200
|
||||
assert r.header_dict['Content-Type'] == 'text/html; charset=utf-8'
|
||||
assert 'GEMINI record about' in r.body
|
||||
|
||||
url = '/harvest/object/{0}/html/original'.format(self.object_id_2)
|
||||
r = self.app.get(url)
|
||||
assert r.status == 200
|
||||
assert r.header_dict['Content-Type'] == 'text/html; charset=utf-8'
|
||||
assert 'GEMINI record about' in r.body
|
||||
|
|
Loading…
Reference in New Issue