Extract group info from GeoNetwork categories. Closes #2.
Collect values from old gml: namespace. Closes #3. Parse TimeIstant extents.
This commit is contained in:
parent
f51ca2d117
commit
66fa23232d
|
@ -1 +1,9 @@
|
|||
from geonetwork import GeoNetworkHarvester
|
||||
try:
|
||||
import pkg_resources
|
||||
pkg_resources.declare_namespace(__name__)
|
||||
except ImportError:
|
||||
import pkgutil
|
||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||
|
||||
from ckanext.geonetwork.harvesters.geonetwork import GeoNetworkHarvester
|
||||
from ckanext.geonetwork.harvesters.utils import GeoNetworkClient
|
||||
|
|
|
@ -1,28 +1,56 @@
|
|||
from .utils import GeoNetworkClient
|
||||
from .utils import GEONETWORK_V210, GEONETWORK_V26
|
||||
|
||||
import re
|
||||
import urllib
|
||||
import urlparse
|
||||
|
||||
import logging
|
||||
|
||||
from ckan import model
|
||||
from ckan.model import Session
|
||||
|
||||
from ckan.plugins.core import SingletonPlugin, implements
|
||||
|
||||
from ckanext.harvest.interfaces import IHarvester
|
||||
from ckanext.harvest.model import HarvestObject
|
||||
from ckanext.harvest.model import HarvestObjectExtra as HOExtra
|
||||
from ckan.plugins.core import SingletonPlugin
|
||||
|
||||
from ckanext.spatial.lib.csw_client import CswService
|
||||
from ckanext.spatial.harvesters.base import SpatialHarvester, text_traceback
|
||||
from ckanext.spatial.harvesters.csw import CSWHarvester
|
||||
|
||||
from ckanext.spatial.model import ISODocument
|
||||
from ckanext.spatial.model import ISOElement
|
||||
|
||||
from ckan.lib.helpers import json
|
||||
import math
|
||||
from ckan.logic import ValidationError, NotFound, get_action
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Extend the ISODocument definitions by adding some more useful elements
|
||||
|
||||
log.info('GeoNetwork harvester: extending ISODocument with TimeInstant')
|
||||
ISODocument.elements.append(
|
||||
ISOElement(
|
||||
name="temporal-extent-instant",
|
||||
search_paths=[
|
||||
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimeInstant/gml:timePosition/text()",
|
||||
],
|
||||
multiplicity="*",
|
||||
))
|
||||
|
||||
# Some old GN instances still uses the old GML URL
|
||||
# We'll add more xpath for addressing this issue
|
||||
log.info('GeoNetwork harvester: adding old GML URI')
|
||||
ISOElement.namespaces['oldgml'] = "http://www.opengis.net/gml"
|
||||
|
||||
for element in ISODocument.elements:
|
||||
newpaths = []
|
||||
|
||||
for path in element.search_paths:
|
||||
if "gml:" in path:
|
||||
newpath = path.replace('gml:', 'oldgml:')
|
||||
newpaths.append(newpath)
|
||||
|
||||
for newpath in newpaths:
|
||||
element.search_paths.append(newpath)
|
||||
log.info("Added old URI for gml to %s", element.name)
|
||||
|
||||
|
||||
class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
||||
|
||||
def info(self):
|
||||
|
@ -38,23 +66,23 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
|||
package_dict = super(GeoNetworkHarvester, self).get_package_dict(iso_values, harvest_object)
|
||||
|
||||
# Add default_tags from config
|
||||
default_tags = self.source_config.get('default_tags',[])
|
||||
default_tags = self.source_config.get('default_tags', [])
|
||||
if default_tags:
|
||||
for tag in default_tags:
|
||||
package_dict['tags'].append({'name': tag})
|
||||
|
||||
# Add default_extras from config
|
||||
default_extras = self.source_config.get('default_extras',{})
|
||||
default_extras = self.source_config.get('default_extras', {})
|
||||
if default_extras:
|
||||
override_extras = self.source_config.get('override_extras',False)
|
||||
override_extras = self.source_config.get('override_extras', False)
|
||||
|
||||
existing_keys = [entry.get('key') for entry in package_dict['extras'] ]
|
||||
existing_keys = [entry.get('key') for entry in package_dict['extras']]
|
||||
|
||||
for key,value in default_extras.iteritems():
|
||||
for key, value in default_extras.iteritems():
|
||||
log.debug('Processing extra %s', key)
|
||||
if not key in existing_keys or override_extras:
|
||||
# Look for replacement strings
|
||||
if isinstance(value,basestring):
|
||||
if isinstance(value, basestring):
|
||||
value = value.format(
|
||||
harvest_source_id=str(harvest_object.job.source.id),
|
||||
harvest_source_url=str(harvest_object.job.source.url).strip('/'),
|
||||
|
@ -62,24 +90,77 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
|||
harvest_job_id=str(harvest_object.job.id),
|
||||
harvest_object_id=str(harvest_object.id),
|
||||
guid=str(harvest_object.guid))
|
||||
package_dict['extras'].append( {'key': key, 'value': value })
|
||||
package_dict['extras'].append({'key': key, 'value': value})
|
||||
else:
|
||||
log.debug('Skipping existing extra %s', key)
|
||||
|
||||
|
||||
# Add GeoNetowrk specific extras
|
||||
# Add GeoNetwork specific extras
|
||||
gn_localized_url = harvest_object.job.source.url.strip('/')
|
||||
|
||||
if gn_localized_url[-3:] == 'csw' :
|
||||
if gn_localized_url[-3:] == 'csw':
|
||||
gn_localized_url = gn_localized_url[:-3]
|
||||
|
||||
log.debug('GN localized URL %s', gn_localized_url)
|
||||
#log.debug('Package dict is %r ', package_dict['extras'])
|
||||
|
||||
package_dict['extras'].append( {'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid })
|
||||
package_dict['extras'].append( {'key': 'gn_localized_url', 'value': gn_localized_url })
|
||||
package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
|
||||
package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
|
||||
|
||||
# Add other elements from ISO metadata
|
||||
time_extents = self.infer_timeinstants(iso_values)
|
||||
if time_extents:
|
||||
log.info("Adding Time Instants...")
|
||||
package_dict['extras'].append({'key': 'temporal-extent-instant', 'value': time_extents})
|
||||
|
||||
# Handle groups mapping using GeoNetwork categories
|
||||
group_mapping = self.source_config.get('group_mapping', {})
|
||||
|
||||
if group_mapping:
|
||||
try:
|
||||
context = {'model': model, 'session': Session, 'user': 'harvest'}
|
||||
validated_groups = []
|
||||
|
||||
version = self.source_config.get('version')
|
||||
client = GeoNetworkClient(gn_localized_url, version)
|
||||
cats = client.retrieveMetadataCategories(harvest_object.guid)
|
||||
for cat in cats:
|
||||
groupname = group_mapping[cat]
|
||||
|
||||
printname = groupname if not None else "NONE"
|
||||
log.debug("category %s mapped into %s" % (cat, printname))
|
||||
|
||||
if groupname:
|
||||
try:
|
||||
data_dict = {'id': groupname}
|
||||
group = get_action('group_show')(context, data_dict)
|
||||
#log.info('Group %s found %s' % (groupname, group))
|
||||
#if self.api_version == 1:
|
||||
#validated_groups.append(group['name'])
|
||||
#else:
|
||||
#validated_groups.append(group['id'])
|
||||
validated_groups.append({'name': groupname})
|
||||
except NotFound, e:
|
||||
log.warning('Group %s from category %s is not available' % (groupname, cat))
|
||||
|
||||
package_dict['groups'] = validated_groups
|
||||
except e:
|
||||
log.warning('Error handling groups for metadata %s' % harvest_object.guid)
|
||||
|
||||
# End of processing, return the modified package
|
||||
return package_dict
|
||||
|
||||
def infer_timeinstants(self, values):
|
||||
extents = []
|
||||
|
||||
for extent in values["temporal-extent-instant"]:
|
||||
if extent not in extents:
|
||||
extents.append(extent)
|
||||
|
||||
log.info("%d TIME ISTANTS FOUND", len(extents))
|
||||
|
||||
if len(extents) > 0:
|
||||
return ",".join(extents)
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
#import re
|
||||
import urllib
|
||||
import urllib2
|
||||
import zipfile
|
||||
from StringIO import StringIO
|
||||
from lxml import etree
|
||||
|
||||
GEONETWORK_V26 = "2.6"
|
||||
GEONETWORK_V210 = "2.10"
|
||||
GEONETWORK_VERSIONS = [GEONETWORK_V26, GEONETWORK_V210]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GeoNetworkClient(object):
|
||||
|
||||
def __init__(self, base, version):
|
||||
if version is None:
|
||||
version = GEONETWORK_V210
|
||||
|
||||
assert version in GEONETWORK_VERSIONS
|
||||
self.version = version
|
||||
self.base = base
|
||||
|
||||
def retrieveInfo(self, uuid):
|
||||
|
||||
if self.version == GEONETWORK_V26:
|
||||
url = "%s/srv/en/mef.export" % self.base
|
||||
#headers = {
|
||||
#"Content-Type": "application/x-www-form-urlencoded",
|
||||
#"Accept": "text/plain"
|
||||
#}
|
||||
query = urllib.urlencode({
|
||||
"uuid": uuid
|
||||
})
|
||||
|
||||
logger.info('Loading MEF for %s', uuid)
|
||||
request = urllib2.Request(url, query)
|
||||
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(), urllib2.HTTPRedirectHandler())
|
||||
|
||||
response = opener.open(request) # will get a ZIP file
|
||||
content = response.read()
|
||||
|
||||
#logger.info('----> %s', content)
|
||||
#print 'RESPONSE ', content
|
||||
|
||||
zdata = StringIO(content)
|
||||
zfile = zipfile.ZipFile(zdata)
|
||||
|
||||
xml = None
|
||||
|
||||
for name in zfile.namelist():
|
||||
#logger.info(' MEF entry: %s', name)
|
||||
#print ' MEF entry: ', name
|
||||
if name == 'info.xml':
|
||||
uncompressed = zfile.read(name)
|
||||
xml = etree.fromstring(uncompressed)
|
||||
|
||||
return xml
|
||||
|
||||
def retrieveMetadataCategories(self, uuid):
|
||||
xml = self.retrieveInfo(uuid)
|
||||
|
||||
cats = []
|
||||
|
||||
for cat in xml.findall('categories/category'):
|
||||
cats.append(cat.get('name'))
|
||||
|
||||
return cats
|
||||
|
4
setup.py
4
setup.py
|
@ -1,7 +1,7 @@
|
|||
from setuptools import setup, find_packages
|
||||
import sys, os
|
||||
|
||||
version = '1.0'
|
||||
version = '1.1'
|
||||
|
||||
setup(
|
||||
name='ckanext-geonetwork',
|
||||
|
@ -22,7 +22,7 @@ setup(
|
|||
install_requires=[
|
||||
# -*- Extra requirements: -*-
|
||||
],
|
||||
entry_points=\
|
||||
entry_points=
|
||||
"""
|
||||
[ckan.plugins]
|
||||
# Add plugins here, eg
|
||||
|
|
Loading…
Reference in New Issue