Merge pull request #135 from tomkralidis/issue-134
remove legacy CSW server implementation
This commit is contained in:
commit
005e47c635
|
@ -1,458 +0,0 @@
|
||||||
try: from cStringIO import StringIO
|
|
||||||
except ImportError: from StringIO import StringIO
|
|
||||||
import traceback
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from pylons import request, response, config
|
|
||||||
from lxml import etree
|
|
||||||
from owslib.csw import namespaces
|
|
||||||
from sqlalchemy import select,distinct,or_
|
|
||||||
|
|
||||||
from ckan.lib.base import BaseController
|
|
||||||
from ckan.lib.helpers import truncate
|
|
||||||
from ckan.model import Package
|
|
||||||
from ckan.model.meta import Session
|
|
||||||
from ckanext.harvest.model import HarvestObject, HarvestJob, HarvestSource
|
|
||||||
|
|
||||||
namespaces["xlink"] = "http://www.w3.org/1999/xlink"
|
|
||||||
|
|
||||||
log = __import__("logging").getLogger(__name__)
|
|
||||||
|
|
||||||
LOG_XML_LENGTH = config.get('cswservice.log_xml_length', 1000)
|
|
||||||
|
|
||||||
from random import random
|
|
||||||
class __rlog__(object):
|
|
||||||
"""
|
|
||||||
Random log -- log wrapper to log a defined percentage
|
|
||||||
of dialogues
|
|
||||||
"""
|
|
||||||
def __init__(self, threshold=config["cswservice.rndlog_threshold"]):
|
|
||||||
self.threshold = threshold
|
|
||||||
self.i = random()
|
|
||||||
def __getattr__(self, attr):
|
|
||||||
if self.i > self.threshold:
|
|
||||||
return self.dummy
|
|
||||||
return getattr(log, attr)
|
|
||||||
def dummy(self, *av, **kw):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def ntag(nselem):
|
|
||||||
pfx, elem = nselem.split(":")
|
|
||||||
return "{%s}%s" % (namespaces[pfx], elem)
|
|
||||||
|
|
||||||
class CatalogueServiceWebController(BaseController):
|
|
||||||
'''Basic CSW server'''
|
|
||||||
|
|
||||||
def _operations(self):
|
|
||||||
'''Returns a list of this class\'s methods.
|
|
||||||
'''
|
|
||||||
return dict((x, getattr(self, x)) for x in dir(self) if not x.startswith("_"))
|
|
||||||
|
|
||||||
def dispatch_get(self):
|
|
||||||
self.rlog = __rlog__()
|
|
||||||
self.rlog.info("request environ\n%s", request.environ)
|
|
||||||
self.rlog.info("request headers\n%s", request.headers)
|
|
||||||
ops = self._operations()
|
|
||||||
req = dict(request.GET.items())
|
|
||||||
|
|
||||||
## special cases
|
|
||||||
if "REQUEST" in req: req["request"] = req["REQUEST"]
|
|
||||||
if "SERVICE" in req: req["service"] = req["SERVICE"]
|
|
||||||
|
|
||||||
if "request" not in req:
|
|
||||||
err = self._exception(exceptionCode="MissingParameterValue", locator="request")
|
|
||||||
return self._render_xml(err)
|
|
||||||
if req["request"] not in ops:
|
|
||||||
err = self._exception(exceptionCode="OperationNotSupported",
|
|
||||||
locator=req["request"])
|
|
||||||
return self._render_xml(err)
|
|
||||||
if "service" not in req:
|
|
||||||
err = self._exception(exceptionCode="MissingParameterValue", locator="service")
|
|
||||||
return self._render_xml(err)
|
|
||||||
if req["service"] != "CSW":
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="service",
|
|
||||||
text=req["service"])
|
|
||||||
return self._render_xml(err)
|
|
||||||
## fill in some defaults
|
|
||||||
startPosition = req.get("startPosition", 1)
|
|
||||||
try:
|
|
||||||
req["startPosition"] = int(startPosition)
|
|
||||||
except:
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="startPosition",
|
|
||||||
text=unicode(startPosition))
|
|
||||||
return self._render_xml(err)
|
|
||||||
maxRecords = req.get("maxRecords", 10)
|
|
||||||
try:
|
|
||||||
req["maxRecords"] = int(maxRecords)
|
|
||||||
except:
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="maxRecords",
|
|
||||||
text=unicode(maxRecords))
|
|
||||||
return self._render_xml(err)
|
|
||||||
req["id"] = [req["id"]] if "id" in req else []
|
|
||||||
return ops[req["request"]](req)
|
|
||||||
|
|
||||||
def dispatch_post(self):
|
|
||||||
self.rlog = __rlog__()
|
|
||||||
self.rlog.info("request environ\n%s", request.environ)
|
|
||||||
self.rlog.info("request headers\n%s", request.headers)
|
|
||||||
ops = self._operations()
|
|
||||||
try:
|
|
||||||
req = etree.parse(StringIO(request.body))
|
|
||||||
self.rlog.info(u"request body\n%s", etree.tostring(req, pretty_print=True))
|
|
||||||
except:
|
|
||||||
self.rlog.info("request body\n%s", request.body)
|
|
||||||
self.rlog.error("exception parsing body\n%s", traceback.format_exc())
|
|
||||||
err = self._exception(exceptionCode="MissingParameterValue", locator="request")
|
|
||||||
return self._render_xml(err)
|
|
||||||
|
|
||||||
root = req.getroot()
|
|
||||||
_unused, op = root.tag.rsplit("}", 1)
|
|
||||||
if op not in ops:
|
|
||||||
err = self._exception(exceptionCode="OperationNotSupported", locator=op)
|
|
||||||
return self._render_xml(err)
|
|
||||||
|
|
||||||
req = self._parse_xml_request(root)
|
|
||||||
if not isinstance(req, dict):
|
|
||||||
return req
|
|
||||||
return ops[op](req)
|
|
||||||
|
|
||||||
def _render_xml(self, root):
|
|
||||||
tree = etree.ElementTree(root)
|
|
||||||
data = etree.tostring(tree, pretty_print=True)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
response.content_type = "application/xml"
|
|
||||||
response.charset="UTF-8"
|
|
||||||
self.rlog.info("response headers:\n%s", response.headers)
|
|
||||||
self.rlog.info("response.body:\n%s", data)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def _exception(self, text=None, **kw):
|
|
||||||
metaargs = {
|
|
||||||
"nsmap": namespaces,
|
|
||||||
"version": "1.0.0",
|
|
||||||
ntag("xsi:schemaLocation"): "http://schemas.opengis.net/ows/1.0.0/owsExceptionReport.xsd",
|
|
||||||
}
|
|
||||||
root = etree.Element(ntag("ows:ExceptionReport"), **metaargs)
|
|
||||||
exc = etree.SubElement(root, ntag("ows:Exception"), **kw)
|
|
||||||
if text is not None:
|
|
||||||
txt = etree.SubElement(exc, ntag("ows:ExceptionText"))
|
|
||||||
txt.text = text
|
|
||||||
return root
|
|
||||||
|
|
||||||
def _parse_xml_request(self, root):
|
|
||||||
"""
|
|
||||||
Check common parts of the request for GetRecords, GetRecordById, etc.
|
|
||||||
Return a dictionary of parameters or else an XML error message (string).
|
|
||||||
The dictionary should be compatible with CSW GET requests
|
|
||||||
"""
|
|
||||||
service = root.get("service")
|
|
||||||
if service is None:
|
|
||||||
err = self._exception(exceptionCode="MissingParameterValue", locator="service")
|
|
||||||
return self._render_xml(err)
|
|
||||||
elif service != "CSW":
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="service", text=service)
|
|
||||||
return self._render_xml(err)
|
|
||||||
outputSchema = root.get("outputSchema", namespaces["gmd"])
|
|
||||||
if outputSchema != namespaces["gmd"]:
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="outputSchema", text=outputSchema)
|
|
||||||
return self._render_xml(err)
|
|
||||||
resultType = root.get("resultType", "results")
|
|
||||||
if resultType not in ("results", "hits"):
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="resultType", text=resultType)
|
|
||||||
return self._render_xml(err)
|
|
||||||
outputFormat = root.get("outputFormat", "application/xml")
|
|
||||||
if outputFormat != "application/xml":
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="outputFormat", text=outputFormat)
|
|
||||||
return self._render_xml(err)
|
|
||||||
elementSetName = root.get("elementSetName", "full")
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"outputSchema": outputSchema,
|
|
||||||
"resultType": resultType,
|
|
||||||
"outputFormat": outputFormat,
|
|
||||||
"elementSetName": elementSetName
|
|
||||||
}
|
|
||||||
|
|
||||||
startPosition = root.get("startPosition", "1")
|
|
||||||
try:
|
|
||||||
params["startPosition"] = int(startPosition)
|
|
||||||
except:
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="startPosition")
|
|
||||||
return self._render_xml(err)
|
|
||||||
maxRecords = root.get("maxRecords", "10")
|
|
||||||
try:
|
|
||||||
params["maxRecords"] = int(maxRecords)
|
|
||||||
except:
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="maxRecords")
|
|
||||||
return self._render_xml(err)
|
|
||||||
|
|
||||||
params["id"] = [x.text for x in root.findall(ntag("csw:Id"))]
|
|
||||||
|
|
||||||
query = root.find(ntag("csw:Query"))
|
|
||||||
if query is not None:
|
|
||||||
params.update(self._parse_query(query))
|
|
||||||
|
|
||||||
if params["elementSetName"] not in ("full", "brief", "summary"):
|
|
||||||
err = self._exception(exceptionCode="InvalidParameterValue", locator="elementSetName",
|
|
||||||
text=params["elementSetName"])
|
|
||||||
return self._render_xml(err)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
def _parse_query(self, query):
|
|
||||||
params = {}
|
|
||||||
params["typeNames"] = query.get("typeNames", "csw:Record")
|
|
||||||
esn = query.find(ntag("csw:ElementSetName"))
|
|
||||||
if esn is not None:
|
|
||||||
params["elementSetName"] = esn.text
|
|
||||||
return params
|
|
||||||
|
|
||||||
def GetCapabilities(self, req):
|
|
||||||
site = request.host_url + request.path
|
|
||||||
caps = etree.Element(ntag("csw:Capabilities"), nsmap=namespaces)
|
|
||||||
srvid = etree.SubElement(caps, ntag("ows:ServiceIdentification"))
|
|
||||||
title = etree.SubElement(srvid, ntag("ows:Title"))
|
|
||||||
title.text = unicode(config["cswservice.title"])
|
|
||||||
abstract = etree.SubElement(srvid, ntag("ows:Abstract"))
|
|
||||||
abstract.text = unicode(config["cswservice.abstract"])
|
|
||||||
keywords = etree.SubElement(srvid, ntag("ows:Keywords"))
|
|
||||||
for word in [w.strip() for w in config["cswservice.keywords"].split(",")]:
|
|
||||||
if word == "": continue
|
|
||||||
kw = etree.SubElement(keywords, ntag("ows:Keyword"))
|
|
||||||
kw.text = unicode(word)
|
|
||||||
kwtype = etree.SubElement(keywords, ntag("ows:Type"))
|
|
||||||
kwtype.text = unicode(config["cswservice.keyword_type"])
|
|
||||||
srvtype = etree.SubElement(srvid, ntag("ows:ServiceType"))
|
|
||||||
srvtype.text = "CSW"
|
|
||||||
srvver = etree.SubElement(srvid, ntag("ows:ServiceTypeVersion"))
|
|
||||||
srvver.text = "2.0.2"
|
|
||||||
### ows:Fees, ows:AccessConstraints
|
|
||||||
|
|
||||||
provider = etree.SubElement(caps, ntag("ows:ServiceProvider"))
|
|
||||||
provname = etree.SubElement(provider, ntag("ows:ProviderName"))
|
|
||||||
provname.text = unicode(config["cswservice.provider_name"])
|
|
||||||
attrs = {
|
|
||||||
ntag("xlink:href"): site
|
|
||||||
}
|
|
||||||
etree.SubElement(provider, ntag("ows:ProviderSite"), **attrs)
|
|
||||||
|
|
||||||
contact = etree.SubElement(provider, ntag("ows:ServiceContact"))
|
|
||||||
name = etree.SubElement(contact, ntag("ows:IndividualName"))
|
|
||||||
name.text = unicode(config["cswservice.contact_name"])
|
|
||||||
pos = etree.SubElement(contact, ntag("ows:PositionName"))
|
|
||||||
pos.text = unicode(config["cswservice.contact_position"])
|
|
||||||
cinfo = etree.SubElement(contact, ntag("ows:ContactInfo"))
|
|
||||||
phone = etree.SubElement(cinfo, ntag("ows:Phone"))
|
|
||||||
voice = etree.SubElement(phone, ntag("ows:Voice"))
|
|
||||||
voice.text = unicode(config["cswservice.contact_voice"])
|
|
||||||
fax = etree.SubElement(phone, ntag("ows:Fax"))
|
|
||||||
fax.text = unicode(config["cswservice.contact_fax"])
|
|
||||||
addr = etree.SubElement(cinfo, ntag("ows:Address"))
|
|
||||||
dpoint = etree.SubElement(addr, ntag("ows:DeliveryPoint"))
|
|
||||||
dpoint.text= unicode(config["cswservice.contact_address"])
|
|
||||||
city = etree.SubElement(addr, ntag("ows:City"))
|
|
||||||
city.text = unicode(config["cswservice.contact_city"])
|
|
||||||
region = etree.SubElement(addr, ntag("ows:AdministrativeArea"))
|
|
||||||
region.text = unicode(config["cswservice.contact_region"])
|
|
||||||
pcode = etree.SubElement(addr, ntag("ows:PostalCode"))
|
|
||||||
pcode.text = unicode(config["cswservice.contact_pcode"])
|
|
||||||
country = etree.SubElement(addr, ntag("ows:Country"))
|
|
||||||
country.text = unicode(config["cswservice.contact_country"])
|
|
||||||
email = etree.SubElement(addr, ntag("ows:ElectronicMailAddress"))
|
|
||||||
email.text = unicode(config["cswservice.contact_email"])
|
|
||||||
hours = etree.SubElement(cinfo, ntag("ows:HoursOfService"))
|
|
||||||
hours.text = unicode(config["cswservice.contact_hours"])
|
|
||||||
instructions = etree.SubElement(cinfo, ntag("ows:ContactInstructions"))
|
|
||||||
instructions.text = unicode(config["cswservice.contact_instructions"])
|
|
||||||
role = etree.SubElement(contact, ntag("ows:Role"))
|
|
||||||
role.text = unicode(config["cswservice.contact_role"])
|
|
||||||
|
|
||||||
opmeta = etree.SubElement(caps, ntag("ows:OperationsMetadata"))
|
|
||||||
|
|
||||||
op = etree.SubElement(opmeta, ntag("ows:Operation"), name="GetCapabilities")
|
|
||||||
dcp = etree.SubElement(op, ntag("ows:DCP"))
|
|
||||||
http = etree.SubElement(dcp, ntag("ows:HTTP"))
|
|
||||||
attrs = { ntag("xlink:href"): site }
|
|
||||||
etree.SubElement(http, ntag("ows:Get"), **attrs)
|
|
||||||
post = etree.SubElement(http, ntag("ows:Post"), **attrs)
|
|
||||||
pe = etree.SubElement(post, ntag("ows:Constraint"), name="PostEncoding")
|
|
||||||
val = etree.SubElement(pe, ntag("ows:Value"))
|
|
||||||
val.text = "XML"
|
|
||||||
|
|
||||||
op = etree.SubElement(opmeta, ntag("ows:Operation"), name="GetRecords")
|
|
||||||
dcp = etree.SubElement(op, ntag("ows:DCP"))
|
|
||||||
http = etree.SubElement(dcp, ntag("ows:HTTP"))
|
|
||||||
attrs = { ntag("xlink:href"): site }
|
|
||||||
etree.SubElement(http, ntag("ows:Get"), **attrs)
|
|
||||||
post = etree.SubElement(http, ntag("ows:Post"), **attrs)
|
|
||||||
pe = etree.SubElement(post, ntag("ows:Constraint"), name="PostEncoding")
|
|
||||||
val = etree.SubElement(pe, ntag("ows:Value"))
|
|
||||||
val.text = "XML"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="resultType")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "results"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="outputFormat")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "application/xml"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="outputSchema")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "http://www.isotc211.org/2005/gmd"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="typeNames")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "gmd:MD_Metadata"
|
|
||||||
|
|
||||||
op = etree.SubElement(opmeta, ntag("ows:Operation"), name="GetRecordById")
|
|
||||||
dcp = etree.SubElement(op, ntag("ows:DCP"))
|
|
||||||
http = etree.SubElement(dcp, ntag("ows:HTTP"))
|
|
||||||
attrs = { ntag("xlink:href"): site }
|
|
||||||
etree.SubElement(http, ntag("ows:Get"), **attrs)
|
|
||||||
post = etree.SubElement(http, ntag("ows:Post"), **attrs)
|
|
||||||
pe = etree.SubElement(post, ntag("ows:Constraint"), name="PostEncoding")
|
|
||||||
val = etree.SubElement(pe, ntag("ows:Value"))
|
|
||||||
val.text = "XML"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="resultType")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "results"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="outputFormat")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "application/xml"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="outputSchema")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "http://www.isotc211.org/2005/gmd"
|
|
||||||
param = etree.SubElement(op, ntag("ows:Parameter"), name="typeNames")
|
|
||||||
val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
val.text = "gmd:MD_Metadata"
|
|
||||||
|
|
||||||
# op = etree.SubElement(opmeta, ntag("ows:Operation"), name="Harvest")
|
|
||||||
# dcp = etree.SubElement(op, ntag("ows:DCP"))
|
|
||||||
# http = etree.SubElement(dcp, ntag("ows:HTTP"))
|
|
||||||
# attrs = { ntag("xlink:href"): site }
|
|
||||||
# post = etree.SubElement(http, ntag("ows:Post"), **attrs)
|
|
||||||
# pe = etree.SubElement(post, ntag("ows:Constraint"), name="PostEncoding")
|
|
||||||
# val = etree.SubElement(pe, ntag("ows:Value"))
|
|
||||||
# val.text = "XML"
|
|
||||||
# param = etree.SubElement(op, ntag("ows:Parameter"), name="outputFormat")
|
|
||||||
# val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
# val.text = "application/xml"
|
|
||||||
# param = etree.SubElement(op, ntag("ows:Parameter"), name="outputSchema")
|
|
||||||
# val = etree.SubElement(param, ntag("ows:Value"))
|
|
||||||
# val.text = "http://www.isotc211.org/2005/gmd"
|
|
||||||
|
|
||||||
filcap = etree.SubElement(caps, ntag("ogc:Filter_Capabilities"))
|
|
||||||
spacap = etree.SubElement(filcap, ntag("ogc:Spatial_Capabilities"))
|
|
||||||
geomop = etree.SubElement(spacap, ntag("ogc:GeometryOperands"))
|
|
||||||
spaceop = etree.SubElement(spacap, ntag("ogc:SpatialOperators"))
|
|
||||||
|
|
||||||
scalcap = etree.SubElement(filcap, ntag("ogc:Scalar_Capabilities"))
|
|
||||||
lop = etree.SubElement(scalcap, ntag("ogc:LogicalOperators"))
|
|
||||||
cop = etree.SubElement(scalcap, ntag("ogc:ComparisonOperators"))
|
|
||||||
|
|
||||||
idcap = etree.SubElement(filcap, ntag("ogc:Id_Capabilities"))
|
|
||||||
eid = etree.SubElement(idcap, ntag("ogc:EID"))
|
|
||||||
fid = etree.SubElement(idcap, ntag("ogc:FID"))
|
|
||||||
|
|
||||||
data = self._render_xml(caps)
|
|
||||||
log.info('GetCapabilities response: %r', truncate(data, LOG_XML_LENGTH))
|
|
||||||
return data
|
|
||||||
|
|
||||||
def GetRecords(self, req):
|
|
||||||
resp = etree.Element(ntag("csw:GetRecordsResponse"), nsmap=namespaces)
|
|
||||||
etree.SubElement(resp, ntag("csw:SearchStatus"), timestamp=datetime.utcnow().isoformat())
|
|
||||||
|
|
||||||
cursor = Session.connection()
|
|
||||||
|
|
||||||
q = Session.query(distinct(HarvestObject.guid)) \
|
|
||||||
.join(Package) \
|
|
||||||
.join(HarvestSource) \
|
|
||||||
.filter(HarvestObject.current==True) \
|
|
||||||
.filter(Package.state==u'active') \
|
|
||||||
.filter(or_(HarvestSource.type=='gemini-single', \
|
|
||||||
HarvestSource.type=='gemini-waf', \
|
|
||||||
HarvestSource.type=='csw'))
|
|
||||||
|
|
||||||
### TODO Parse query instead of stupidly just returning whatever we like
|
|
||||||
startPosition = req["startPosition"] if req["startPosition"] > 0 else 1
|
|
||||||
maxRecords = req["maxRecords"] if req["maxRecords"] > 0 else 10
|
|
||||||
rset = q.offset(startPosition-1).limit(maxRecords)
|
|
||||||
|
|
||||||
total = q.count()
|
|
||||||
attrs = {
|
|
||||||
"numberOfRecordsMatched": total,
|
|
||||||
"elementSet": req["elementSetName"], # we lie here. it's always really "full"
|
|
||||||
}
|
|
||||||
if req["resultType"] == "results":
|
|
||||||
returned = rset.count()
|
|
||||||
attrs["numberOfRecordsReturned"] = returned
|
|
||||||
if (total-startPosition-1) > returned:
|
|
||||||
attrs["nextRecord"] = startPosition + returned
|
|
||||||
else:
|
|
||||||
attrs["nextRecord"] = 0
|
|
||||||
else:
|
|
||||||
attrs["numberOfRecordsReturned"] = 0
|
|
||||||
|
|
||||||
attrs = dict((k, unicode(v)) for k,v in attrs.items())
|
|
||||||
results = etree.SubElement(resp, ntag("csw:SearchResults"), **attrs)
|
|
||||||
|
|
||||||
if req["resultType"] == "results":
|
|
||||||
for guid, in Session.execute(rset):
|
|
||||||
doc = Session.query(HarvestObject) \
|
|
||||||
.join(Package) \
|
|
||||||
.filter(HarvestObject.guid==guid) \
|
|
||||||
.filter(HarvestObject.current==True) \
|
|
||||||
.filter(Package.state==u'active') \
|
|
||||||
.first()
|
|
||||||
try:
|
|
||||||
|
|
||||||
record = etree.parse(StringIO(doc.content.encode("utf-8")))
|
|
||||||
results.append(record.getroot())
|
|
||||||
except:
|
|
||||||
log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc())
|
|
||||||
raise
|
|
||||||
|
|
||||||
data = self._render_xml(resp)
|
|
||||||
log.info('GetRecords response: %r', truncate(data, LOG_XML_LENGTH))
|
|
||||||
return data
|
|
||||||
|
|
||||||
def GetRecordById(self, req):
|
|
||||||
resp = etree.Element(ntag("csw:GetRecordByIdResponse"), nsmap=namespaces)
|
|
||||||
seen = set()
|
|
||||||
for ident in req["id"]:
|
|
||||||
doc = Session.query(HarvestObject) \
|
|
||||||
.join(Package) \
|
|
||||||
.join(HarvestJob).join(HarvestSource) \
|
|
||||||
.filter(HarvestSource.active==True) \
|
|
||||||
.filter(HarvestObject.guid==ident) \
|
|
||||||
.filter(HarvestObject.package!=None) \
|
|
||||||
.filter(Package.state==u'active') \
|
|
||||||
.order_by(HarvestObject.gathered.desc()) \
|
|
||||||
.limit(1).first()
|
|
||||||
|
|
||||||
if doc is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'MD_Metadata' in doc.content:
|
|
||||||
try:
|
|
||||||
record = etree.parse(StringIO(doc.content.encode("utf-8")))
|
|
||||||
resp.append(record.getroot())
|
|
||||||
except:
|
|
||||||
log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc())
|
|
||||||
raise
|
|
||||||
|
|
||||||
data = self._render_xml(resp)
|
|
||||||
log.info('GetRecordById response: %r', truncate(data, LOG_XML_LENGTH))
|
|
||||||
return data
|
|
||||||
|
|
||||||
### <ns0:GetRecords xmlns:ns0="http://www.opengis.net/cat/csw/2.0.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" outputSchema="http://www.isotc211.org/2005/gmd" outputFormat="application/xml" version="2.0.2" resultType="results" service="CSW" startPosition="1" maxRecords="5" xsi:schemaLocation="http://www.opengis.net/cat/csw/2.0.2 http://schemas.opengis.net/csw/2.0.2/CSW-discovery.xsd">
|
|
||||||
### <ns0:Query typeNames="csw:Record">
|
|
||||||
### <ns0:ElementSetName>full</ns0:ElementSetName>
|
|
||||||
### <ns0:Constraint version="1.1.0">
|
|
||||||
### <ns0:Filter xmlns:ns0="http://www.opengis.net/ogc">
|
|
||||||
### <ns0:PropertyIsEqualTo>
|
|
||||||
### <ns0:PropertyName>dc:type</ns0:PropertyName>
|
|
||||||
### <ns0:Literal>dataset</ns0:Literal>
|
|
||||||
### </ns0:PropertyIsEqualTo>
|
|
||||||
### </ns0:Filter>
|
|
||||||
### </ns0:Constraint>
|
|
||||||
### </ns0:Query>
|
|
||||||
### </ns0:GetRecords>
|
|
||||||
|
|
|
@ -383,45 +383,6 @@ class SpatialQuery(p.SingletonPlugin):
|
||||||
search_results['results'] = pkgs
|
search_results['results'] = pkgs
|
||||||
return search_results
|
return search_results
|
||||||
|
|
||||||
|
|
||||||
class CatalogueServiceWeb(p.SingletonPlugin):
|
|
||||||
p.implements(p.IConfigurable)
|
|
||||||
p.implements(p.IRoutes)
|
|
||||||
|
|
||||||
def configure(self, config):
|
|
||||||
config.setdefault("cswservice.title", "Untitled Service - set cswservice.title in config")
|
|
||||||
config.setdefault("cswservice.abstract", "Unspecified service description - set cswservice.abstract in config")
|
|
||||||
config.setdefault("cswservice.keywords", "")
|
|
||||||
config.setdefault("cswservice.keyword_type", "theme")
|
|
||||||
config.setdefault("cswservice.provider_name", "Unnamed provider - set cswservice.provider_name in config")
|
|
||||||
config.setdefault("cswservice.contact_name", "No contact - set cswservice.contact_name in config")
|
|
||||||
config.setdefault("cswservice.contact_position", "")
|
|
||||||
config.setdefault("cswservice.contact_voice", "")
|
|
||||||
config.setdefault("cswservice.contact_fax", "")
|
|
||||||
config.setdefault("cswservice.contact_address", "")
|
|
||||||
config.setdefault("cswservice.contact_city", "")
|
|
||||||
config.setdefault("cswservice.contact_region", "")
|
|
||||||
config.setdefault("cswservice.contact_pcode", "")
|
|
||||||
config.setdefault("cswservice.contact_country", "")
|
|
||||||
config.setdefault("cswservice.contact_email", "")
|
|
||||||
config.setdefault("cswservice.contact_hours", "")
|
|
||||||
config.setdefault("cswservice.contact_instructions", "")
|
|
||||||
config.setdefault("cswservice.contact_role", "")
|
|
||||||
|
|
||||||
config["cswservice.rndlog_threshold"] = float(config.get("cswservice.rndlog_threshold", "0.01"))
|
|
||||||
|
|
||||||
def before_map(self, route_map):
|
|
||||||
c = "ckanext.spatial.controllers.csw:CatalogueServiceWebController"
|
|
||||||
route_map.connect("/csw", controller=c, action="dispatch_get",
|
|
||||||
conditions={"method": ["GET"]})
|
|
||||||
route_map.connect("/csw", controller=c, action="dispatch_post",
|
|
||||||
conditions={"method": ["POST"]})
|
|
||||||
|
|
||||||
return route_map
|
|
||||||
|
|
||||||
def after_map(self, route_map):
|
|
||||||
return route_map
|
|
||||||
|
|
||||||
class HarvestMetadataApi(p.SingletonPlugin):
|
class HarvestMetadataApi(p.SingletonPlugin):
|
||||||
'''
|
'''
|
||||||
Harvest Metadata API
|
Harvest Metadata API
|
||||||
|
|
|
@ -1,27 +1,12 @@
|
||||||
import time
|
import time
|
||||||
import urllib
|
|
||||||
from urllib2 import urlopen
|
from urllib2 import urlopen
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from owslib.csw import CatalogueServiceWeb
|
|
||||||
from owslib.fes import PropertyIsEqualTo
|
|
||||||
from owslib.iso import MD_Metadata
|
|
||||||
from pylons import config
|
from pylons import config
|
||||||
from nose.plugins.skip import SkipTest
|
from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
from ckan.model import engine_is_sqlite
|
from ckan.model import engine_is_sqlite
|
||||||
|
|
||||||
service = "http://ogcdev.bgs.ac.uk/geonetwork/srv/en/csw"
|
|
||||||
service = "http://ec2-46-51-149-132.eu-west-1.compute.amazonaws.com:8080/geonetwork/srv/csw"
|
|
||||||
service = "http://localhost:5000/csw"
|
|
||||||
#service = "http://localhost:8080/geonetwork/srv/csw"
|
|
||||||
|
|
||||||
GMD = "http://www.isotc211.org/2005/gmd"
|
|
||||||
|
|
||||||
### make sure GetRecords is called first so that we can use identifiers
|
|
||||||
### we know exist later on in GetRecordById
|
|
||||||
identifiers = []
|
|
||||||
|
|
||||||
# copied from ckan/tests/__init__ to save importing it and therefore
|
# copied from ckan/tests/__init__ to save importing it and therefore
|
||||||
# setting up Pylons.
|
# setting up Pylons.
|
||||||
class CkanServerCase:
|
class CkanServerCase:
|
||||||
|
@ -79,103 +64,3 @@ class CkanProcess(CkanServerCase):
|
||||||
def teardown_class(cls):
|
def teardown_class(cls):
|
||||||
cls._stop_ckan_server(cls.pid)
|
cls._stop_ckan_server(cls.pid)
|
||||||
|
|
||||||
class TestCswClient(CkanProcess):
|
|
||||||
|
|
||||||
## Invalid requests ##
|
|
||||||
|
|
||||||
def test_invalid(self):
|
|
||||||
params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
|
|
||||||
f = urlopen(service, params)
|
|
||||||
response = f.read()
|
|
||||||
f.close()
|
|
||||||
assert "MissingParameterValue" in response, response
|
|
||||||
assert 'locator="request"' in response, response
|
|
||||||
|
|
||||||
def test_empty(self):
|
|
||||||
fp = urlopen(service)
|
|
||||||
response = fp.read()
|
|
||||||
fp.close()
|
|
||||||
assert "MissingParameterValue" in response, response
|
|
||||||
assert 'locator="request"' in response, response
|
|
||||||
|
|
||||||
def test_invalid_request(self):
|
|
||||||
fp = urlopen(service + "?request=foo")
|
|
||||||
response = fp.read()
|
|
||||||
fp.close()
|
|
||||||
assert "OperationNotSupported" in response, response
|
|
||||||
assert 'locator="foo"' in response, response
|
|
||||||
|
|
||||||
def test_invalid_service(self):
|
|
||||||
fp = urlopen(service + "?request=GetCapabilities&service=hello")
|
|
||||||
response = fp.read()
|
|
||||||
fp.close()
|
|
||||||
assert "InvalidParameterValue" in response, response
|
|
||||||
assert 'locator="service"' in response, response
|
|
||||||
|
|
||||||
## Test Capabilities ##
|
|
||||||
|
|
||||||
def test_good(self):
|
|
||||||
fp = urlopen(service + "?request=GetCapabilities&service=CSW")
|
|
||||||
caps = fp.read()
|
|
||||||
fp.close()
|
|
||||||
assert "GetCapabilities" in caps
|
|
||||||
assert "GetRecords" in caps
|
|
||||||
assert "GetRecordById" in caps
|
|
||||||
|
|
||||||
def test_good_post(self):
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
assert csw.identification.title, csw.identification.title
|
|
||||||
ops = dict((x.name, x.methods) for x in csw.operations)
|
|
||||||
assert "GetCapabilities" in ops
|
|
||||||
assert "GetRecords" in ops
|
|
||||||
assert "GetRecordById" in ops
|
|
||||||
|
|
||||||
## Get 01 Records ##
|
|
||||||
|
|
||||||
|
|
||||||
def test_GetRecords(self):
|
|
||||||
# NB: This test fails because no records have been setup...
|
|
||||||
raise SkipTest() # therefore skip
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
csw.getrecords2(outputschema=GMD, startposition=1, maxrecords=5)
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
#print csw.response[:1024]
|
|
||||||
assert nrecords == 5, nrecords
|
|
||||||
for ident in csw.records:
|
|
||||||
identifiers.append(ident)
|
|
||||||
assert isinstance(csw.records[ident], MD_Metadata), (ident, csw.records[ident])
|
|
||||||
|
|
||||||
def test_GetRecords_dataset(self):
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
constraints = [PropertyIsEqualTo("dc:type", "dataset")]
|
|
||||||
csw.getrecords2(constraints=constraints, outputschema=GMD, startposition=1, maxrecords=5)
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
# TODO
|
|
||||||
|
|
||||||
def test_GetRecords_brief(self):
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
csw.getrecords2(outputschema=GMD, startposition=1, maxrecords=5, esn="brief")
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
# TODO
|
|
||||||
|
|
||||||
def test_GetRecords_summary(self):
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
csw.getrecords2(outputschema=GMD, startposition=1, maxrecords=5, esn="summary")
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
# TODO
|
|
||||||
|
|
||||||
## Get 02 RecordById ##
|
|
||||||
|
|
||||||
def test_GetRecordById(self):
|
|
||||||
csw = CatalogueServiceWeb(service)
|
|
||||||
tofetch = identifiers[:2]
|
|
||||||
csw.getrecordbyid(tofetch, outputschema=GMD)
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
assert nrecords == len(tofetch), nrecords
|
|
||||||
for ident in csw.records:
|
|
||||||
identifiers.append(ident)
|
|
||||||
assert isinstance(csw.records[ident], MD_Metadata), (ident, csw.records[ident])
|
|
||||||
|
|
||||||
csw.getrecordbyid(["nonexistent"], outputschema=GMD)
|
|
||||||
nrecords = len(csw.records)
|
|
||||||
assert nrecords == 0, nrecords
|
|
||||||
|
|
34
doc/csw.rst
34
doc/csw.rst
|
@ -200,38 +200,8 @@ keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN.
|
||||||
pycsw should be now accessible at http://localhost/csw
|
pycsw should be now accessible at http://localhost/csw
|
||||||
|
|
||||||
|
|
||||||
Legacy plugins and libraries
|
Legacy libraries
|
||||||
----------------------------
|
----------------
|
||||||
|
|
||||||
|
|
||||||
Old CSW Server
|
|
||||||
++++++++++++++
|
|
||||||
|
|
||||||
.. warning:: **Deprecated:** The old csw plugin has been deprecated, please see `ckan-pycsw`_
|
|
||||||
for details on how to integrate with pycsw.
|
|
||||||
|
|
||||||
To activate it, add the ``csw_server`` plugin to your ini file.
|
|
||||||
|
|
||||||
Only harvested datasets are served by this CSW Server. This is because
|
|
||||||
the harvested document is the one that is served, not something derived
|
|
||||||
from the CKAN Dataset object. Datasets that are created in CKAN by methods
|
|
||||||
other than harvesting are not served.
|
|
||||||
|
|
||||||
The currently supported methods with this CSW Server are:
|
|
||||||
* GetCapabilities
|
|
||||||
* GetRecords
|
|
||||||
* GetRecordById
|
|
||||||
|
|
||||||
For example you can ask the capabilities of the CSW server installed into CKAN
|
|
||||||
running on 127.0.0.1:5000 like this::
|
|
||||||
|
|
||||||
curl 'http://127.0.0.1:5000/csw?request=GetCapabilities&service=CSW&version=2.0.2'
|
|
||||||
|
|
||||||
And get a list of the records like this::
|
|
||||||
|
|
||||||
curl 'http://127.0.0.1:5000/csw?request=GetRecords&service=CSW&resultType=results&elementSetName=full&version=2.0.2'
|
|
||||||
|
|
||||||
The standard CSW response is in XML format.
|
|
||||||
|
|
||||||
cswinfo
|
cswinfo
|
||||||
+++++++
|
+++++++
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -30,7 +30,6 @@ setup(
|
||||||
[ckan.plugins]
|
[ckan.plugins]
|
||||||
spatial_metadata=ckanext.spatial.plugin:SpatialMetadata
|
spatial_metadata=ckanext.spatial.plugin:SpatialMetadata
|
||||||
spatial_query=ckanext.spatial.plugin:SpatialQuery
|
spatial_query=ckanext.spatial.plugin:SpatialQuery
|
||||||
cswserver=ckanext.spatial.plugin:CatalogueServiceWeb
|
|
||||||
spatial_harvest_metadata_api=ckanext.spatial.plugin:HarvestMetadataApi
|
spatial_harvest_metadata_api=ckanext.spatial.plugin:HarvestMetadataApi
|
||||||
|
|
||||||
csw_harvester=ckanext.spatial.harvesters:CSWHarvester
|
csw_harvester=ckanext.spatial.harvesters:CSWHarvester
|
||||||
|
|
2
test.ini
2
test.ini
|
@ -14,7 +14,7 @@ port = 5000
|
||||||
[app:main]
|
[app:main]
|
||||||
use = config:../ckan/test-core.ini
|
use = config:../ckan/test-core.ini
|
||||||
ckan.legacy_templates = false
|
ckan.legacy_templates = false
|
||||||
ckan.plugins = test_spatial_plugin harvest spatial_metadata spatial_query spatial_harvest_metadata_api gemini_csw_harvester gemini_doc_harvester gemini_waf_harvester cswserver
|
ckan.plugins = test_spatial_plugin harvest spatial_metadata spatial_query spatial_harvest_metadata_api gemini_csw_harvester gemini_doc_harvester gemini_waf_harvester
|
||||||
ckan.spatial.srid = 4326
|
ckan.spatial.srid = 4326
|
||||||
ckan.spatial.default_map_extent=-6.88,49.74,0.50,59.2
|
ckan.spatial.default_map_extent=-6.88,49.74,0.50,59.2
|
||||||
ckan.spatial.testing = true
|
ckan.spatial.testing = true
|
||||||
|
|
Loading…
Reference in New Issue