ckanext-d4science_theme/ckanext/d4science_theme/d4sdiscovery/icproxycontroller.py

import logging
import urllib2
from lxml import etree

import xmltodict
import collections

from d4s_namespaces import D4S_Namespaces

XPATH_NAMESPACES = "/Resource/Profile/Body/namespaces"
gcubeTokenParam = "gcube-token"
NAMESPACE_ID_LABEL = '@id'

log = logging.getLogger(__name__)


# Created by Francesco Mangiacrapa
# francesco.mangiacrapa@isti.cnr.it
# ISTI-CNR Pisa (ITALY)

def getResponseBody(uri):
    req = urllib2.Request(uri)
    try:
        resp = urllib2.urlopen(req, timeout=20)
    except urllib2.HTTPError as e:
        log.error("Error on contacting URI: %s" % uri)
        log.error("HTTPError: %d" % e.code)
        return None
    except urllib2.URLError as e:
        # Not an HTTP-specific error (e.g. connection refused)
        log.error("URLError - Input URI: %s " % uri + " is not valid!!")
        return None
    else:
        # 200
        body = resp.read()
        return body


# D4S_IS_DiscoveryCatalogueNamespaces is used to discovery namespaces for Catalogue Categories.
# @param: urlICProxy is the URI of IC proxy rest-full service provided by IS
# @param: resourceID is the resource ID of the Generic Resource: "Namespaces Catalogue Categories"
# @param: gcubeToken the gcube token used to contact the IC proxy
class D4S_IS_DiscoveryCatalogueNamespaces():

    def __init__(self, urlICProxy, resourceID, gcubeToken):
        self.urlICProxy = urlICProxy
        self.resourceID = resourceID
        self.gcubeToken = gcubeToken

    def getNamespacesDictFromResource(self):

        doc = {}
        namespace_list = []

        try:
            # print("proxy: "+self.urlICProxy)
            # print("resourceID: " + self.resourceID)
            # print("gcubeTokenParam: " + gcubeTokenParam)
            # print("gcubeToken: " + self.gcubeToken)

            uri = self.urlICProxy + "/" + self.resourceID + "?" + gcubeTokenParam + "=" + self.gcubeToken
            log.info("Contacting URL: %s" % uri)
            theResource = getResponseBody(uri)
            log.debug("Resource returned %s " % theResource)
            theResourceXML = etree.XML(theResource)
            theNamespaces = theResourceXML.xpath(XPATH_NAMESPACES)
            log.debug("The body %s" % etree.tostring(theNamespaces[0]))

            if theNamespaces is not None and theNamespaces[0] is not None:
                bodyToString = etree.tostring(theNamespaces[0])
                doc = xmltodict.parse(bodyToString)
            else:
                log.warn("No Namespace for Catalogue Categories found, returning None")
        except Exception as inst:
            log.error("Error on getting catalogue namespaces: " + str(inst))
            log.info("Returning empty list of namespaces")
            return namespace_list

        log.debug("IS namespaces resource to dict is: %s" % doc)


        if (doc.has_key('namespaces')):
            # log.debug('Namespaces obj %s:' % doc['namespaces'])
            namespaces = doc['namespaces']
            if doc is not None and namespaces.has_key('namespace'):
                namespace_list = namespaces['namespace']

        log.info("Loaded %d namespaces from IS resource" % len(namespace_list))
        return namespace_list

    @staticmethod
    def to_namespaces_dict_index_for_id(namespace_list):
        namespace_dict = collections.OrderedDict()
        log.debug("namespaces to dict: %s" % namespace_list)
        try:
            if namespace_list is not None and len(namespace_list) > 0:
                for namespace in namespace_list:
                    try:
                        if namespace.has_key(NAMESPACE_ID_LABEL):
                            namespace_dict[namespace[NAMESPACE_ID_LABEL]] = D4S_Namespaces(
                                namespace[NAMESPACE_ID_LABEL],
                                namespace['name'],
                                namespace['title'],
                                namespace['description'])
                    except Exception as inst:
                        log.error("Error on converting catalogue namespaces: " + str(inst))
        except Exception as inst:
            log.error("Error on checking namespace_list: " + str(inst))
        # print "namespace_dict to Nam: %s"%namespace_dict
        return namespace_dict