Extract group info from GeoNetwork categories. Closes #2.
Collect values from old gml: namespace. Closes #3. Parse TimeIstant extents.
This commit is contained in:
parent
f51ca2d117
commit
66fa23232d
|
@ -1 +1,9 @@
|
||||||
from geonetwork import GeoNetworkHarvester
|
try:
|
||||||
|
import pkg_resources
|
||||||
|
pkg_resources.declare_namespace(__name__)
|
||||||
|
except ImportError:
|
||||||
|
import pkgutil
|
||||||
|
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||||
|
|
||||||
|
from ckanext.geonetwork.harvesters.geonetwork import GeoNetworkHarvester
|
||||||
|
from ckanext.geonetwork.harvesters.utils import GeoNetworkClient
|
||||||
|
|
|
@ -1,28 +1,56 @@
|
||||||
|
from .utils import GeoNetworkClient
|
||||||
|
from .utils import GEONETWORK_V210, GEONETWORK_V26
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import urllib
|
|
||||||
import urlparse
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from ckan import model
|
from ckan import model
|
||||||
|
from ckan.model import Session
|
||||||
|
|
||||||
from ckan.plugins.core import SingletonPlugin, implements
|
from ckan.plugins.core import SingletonPlugin
|
||||||
|
|
||||||
from ckanext.harvest.interfaces import IHarvester
|
|
||||||
from ckanext.harvest.model import HarvestObject
|
|
||||||
from ckanext.harvest.model import HarvestObjectExtra as HOExtra
|
|
||||||
|
|
||||||
from ckanext.spatial.lib.csw_client import CswService
|
from ckanext.spatial.lib.csw_client import CswService
|
||||||
from ckanext.spatial.harvesters.base import SpatialHarvester, text_traceback
|
|
||||||
from ckanext.spatial.harvesters.csw import CSWHarvester
|
from ckanext.spatial.harvesters.csw import CSWHarvester
|
||||||
|
|
||||||
|
from ckanext.spatial.model import ISODocument
|
||||||
|
from ckanext.spatial.model import ISOElement
|
||||||
|
|
||||||
from ckan.lib.helpers import json
|
from ckan.logic import ValidationError, NotFound, get_action
|
||||||
import math
|
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Extend the ISODocument definitions by adding some more useful elements
|
||||||
|
|
||||||
|
log.info('GeoNetwork harvester: extending ISODocument with TimeInstant')
|
||||||
|
ISODocument.elements.append(
|
||||||
|
ISOElement(
|
||||||
|
name="temporal-extent-instant",
|
||||||
|
search_paths=[
|
||||||
|
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimeInstant/gml:timePosition/text()",
|
||||||
|
],
|
||||||
|
multiplicity="*",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Some old GN instances still uses the old GML URL
|
||||||
|
# We'll add more xpath for addressing this issue
|
||||||
|
log.info('GeoNetwork harvester: adding old GML URI')
|
||||||
|
ISOElement.namespaces['oldgml'] = "http://www.opengis.net/gml"
|
||||||
|
|
||||||
|
for element in ISODocument.elements:
|
||||||
|
newpaths = []
|
||||||
|
|
||||||
|
for path in element.search_paths:
|
||||||
|
if "gml:" in path:
|
||||||
|
newpath = path.replace('gml:', 'oldgml:')
|
||||||
|
newpaths.append(newpath)
|
||||||
|
|
||||||
|
for newpath in newpaths:
|
||||||
|
element.search_paths.append(newpath)
|
||||||
|
log.info("Added old URI for gml to %s", element.name)
|
||||||
|
|
||||||
|
|
||||||
class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
||||||
|
|
||||||
def info(self):
|
def info(self):
|
||||||
|
@ -66,8 +94,7 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
||||||
else:
|
else:
|
||||||
log.debug('Skipping existing extra %s', key)
|
log.debug('Skipping existing extra %s', key)
|
||||||
|
|
||||||
|
# Add GeoNetwork specific extras
|
||||||
# Add GeoNetowrk specific extras
|
|
||||||
gn_localized_url = harvest_object.job.source.url.strip('/')
|
gn_localized_url = harvest_object.job.source.url.strip('/')
|
||||||
|
|
||||||
if gn_localized_url[-3:] == 'csw':
|
if gn_localized_url[-3:] == 'csw':
|
||||||
|
@ -79,7 +106,61 @@ class GeoNetworkHarvester(CSWHarvester, SingletonPlugin):
|
||||||
package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
|
package_dict['extras'].append({'key': 'gn_view_metadata_url', 'value': gn_localized_url + '/metadata.show?uuid=' + harvest_object.guid})
|
||||||
package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
|
package_dict['extras'].append({'key': 'gn_localized_url', 'value': gn_localized_url})
|
||||||
|
|
||||||
|
# Add other elements from ISO metadata
|
||||||
|
time_extents = self.infer_timeinstants(iso_values)
|
||||||
|
if time_extents:
|
||||||
|
log.info("Adding Time Instants...")
|
||||||
|
package_dict['extras'].append({'key': 'temporal-extent-instant', 'value': time_extents})
|
||||||
|
|
||||||
|
# Handle groups mapping using GeoNetwork categories
|
||||||
|
group_mapping = self.source_config.get('group_mapping', {})
|
||||||
|
|
||||||
|
if group_mapping:
|
||||||
|
try:
|
||||||
|
context = {'model': model, 'session': Session, 'user': 'harvest'}
|
||||||
|
validated_groups = []
|
||||||
|
|
||||||
|
version = self.source_config.get('version')
|
||||||
|
client = GeoNetworkClient(gn_localized_url, version)
|
||||||
|
cats = client.retrieveMetadataCategories(harvest_object.guid)
|
||||||
|
for cat in cats:
|
||||||
|
groupname = group_mapping[cat]
|
||||||
|
|
||||||
|
printname = groupname if not None else "NONE"
|
||||||
|
log.debug("category %s mapped into %s" % (cat, printname))
|
||||||
|
|
||||||
|
if groupname:
|
||||||
|
try:
|
||||||
|
data_dict = {'id': groupname}
|
||||||
|
group = get_action('group_show')(context, data_dict)
|
||||||
|
#log.info('Group %s found %s' % (groupname, group))
|
||||||
|
#if self.api_version == 1:
|
||||||
|
#validated_groups.append(group['name'])
|
||||||
|
#else:
|
||||||
|
#validated_groups.append(group['id'])
|
||||||
|
validated_groups.append({'name': groupname})
|
||||||
|
except NotFound, e:
|
||||||
|
log.warning('Group %s from category %s is not available' % (groupname, cat))
|
||||||
|
|
||||||
|
package_dict['groups'] = validated_groups
|
||||||
|
except e:
|
||||||
|
log.warning('Error handling groups for metadata %s' % harvest_object.guid)
|
||||||
|
|
||||||
# End of processing, return the modified package
|
# End of processing, return the modified package
|
||||||
return package_dict
|
return package_dict
|
||||||
|
|
||||||
|
def infer_timeinstants(self, values):
|
||||||
|
extents = []
|
||||||
|
|
||||||
|
for extent in values["temporal-extent-instant"]:
|
||||||
|
if extent not in extents:
|
||||||
|
extents.append(extent)
|
||||||
|
|
||||||
|
log.info("%d TIME ISTANTS FOUND", len(extents))
|
||||||
|
|
||||||
|
if len(extents) > 0:
|
||||||
|
return ",".join(extents)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import logging
|
||||||
|
#import re
|
||||||
|
import urllib
|
||||||
|
import urllib2
|
||||||
|
import zipfile
|
||||||
|
from StringIO import StringIO
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
GEONETWORK_V26 = "2.6"
|
||||||
|
GEONETWORK_V210 = "2.10"
|
||||||
|
GEONETWORK_VERSIONS = [GEONETWORK_V26, GEONETWORK_V210]
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class GeoNetworkClient(object):
|
||||||
|
|
||||||
|
def __init__(self, base, version):
|
||||||
|
if version is None:
|
||||||
|
version = GEONETWORK_V210
|
||||||
|
|
||||||
|
assert version in GEONETWORK_VERSIONS
|
||||||
|
self.version = version
|
||||||
|
self.base = base
|
||||||
|
|
||||||
|
def retrieveInfo(self, uuid):
|
||||||
|
|
||||||
|
if self.version == GEONETWORK_V26:
|
||||||
|
url = "%s/srv/en/mef.export" % self.base
|
||||||
|
#headers = {
|
||||||
|
#"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
#"Accept": "text/plain"
|
||||||
|
#}
|
||||||
|
query = urllib.urlencode({
|
||||||
|
"uuid": uuid
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info('Loading MEF for %s', uuid)
|
||||||
|
request = urllib2.Request(url, query)
|
||||||
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(), urllib2.HTTPRedirectHandler())
|
||||||
|
|
||||||
|
response = opener.open(request) # will get a ZIP file
|
||||||
|
content = response.read()
|
||||||
|
|
||||||
|
#logger.info('----> %s', content)
|
||||||
|
#print 'RESPONSE ', content
|
||||||
|
|
||||||
|
zdata = StringIO(content)
|
||||||
|
zfile = zipfile.ZipFile(zdata)
|
||||||
|
|
||||||
|
xml = None
|
||||||
|
|
||||||
|
for name in zfile.namelist():
|
||||||
|
#logger.info(' MEF entry: %s', name)
|
||||||
|
#print ' MEF entry: ', name
|
||||||
|
if name == 'info.xml':
|
||||||
|
uncompressed = zfile.read(name)
|
||||||
|
xml = etree.fromstring(uncompressed)
|
||||||
|
|
||||||
|
return xml
|
||||||
|
|
||||||
|
def retrieveMetadataCategories(self, uuid):
|
||||||
|
xml = self.retrieveInfo(uuid)
|
||||||
|
|
||||||
|
cats = []
|
||||||
|
|
||||||
|
for cat in xml.findall('categories/category'):
|
||||||
|
cats.append(cat.get('name'))
|
||||||
|
|
||||||
|
return cats
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -1,7 +1,7 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
version = '1.0'
|
version = '1.1'
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='ckanext-geonetwork',
|
name='ckanext-geonetwork',
|
||||||
|
@ -22,7 +22,7 @@ setup(
|
||||||
install_requires=[
|
install_requires=[
|
||||||
# -*- Extra requirements: -*-
|
# -*- Extra requirements: -*-
|
||||||
],
|
],
|
||||||
entry_points=\
|
entry_points=
|
||||||
"""
|
"""
|
||||||
[ckan.plugins]
|
[ckan.plugins]
|
||||||
# Add plugins here, eg
|
# Add plugins here, eg
|
||||||
|
|
Loading…
Reference in New Issue