From b6c9e8399ffad9215f5ee37ac9b6606134186e57 Mon Sep 17 00:00:00 2001 From: Alessio Fabrizio Date: Fri, 11 Oct 2024 10:56:02 +0200 Subject: [PATCH] add controllers --- ckanext/d4science/controllers/home.py | 71 +++++++++ ckanext/d4science/controllers/organization.py | 135 ++++++++++++++++++ ckanext/d4science/controllers/systemtype.py | 88 ++++++++++++ .../d4sdiscovery/d4s_cache_controller.py | 106 ++++++++++++++ ckanext/d4science/d4sdiscovery/d4s_extras.py | 31 ++++ .../d4science/d4sdiscovery/d4s_namespaces.py | 39 +++++ .../d4sdiscovery/d4s_namespaces_controller.py | 130 +++++++++++++++++ .../d4s_namespaces_extras_util.py | 89 ++++++++++++ .../d4sdiscovery/icproxycontroller.py | 110 ++++++++++++++ 9 files changed, 799 insertions(+) create mode 100644 ckanext/d4science/controllers/home.py create mode 100644 ckanext/d4science/controllers/organization.py create mode 100644 ckanext/d4science/controllers/systemtype.py create mode 100644 ckanext/d4science/d4sdiscovery/d4s_cache_controller.py create mode 100644 ckanext/d4science/d4sdiscovery/d4s_extras.py create mode 100644 ckanext/d4science/d4sdiscovery/d4s_namespaces.py create mode 100644 ckanext/d4science/d4sdiscovery/d4s_namespaces_controller.py create mode 100644 ckanext/d4science/d4sdiscovery/d4s_namespaces_extras_util.py create mode 100644 ckanext/d4science/d4sdiscovery/icproxycontroller.py diff --git a/ckanext/d4science/controllers/home.py b/ckanext/d4science/controllers/home.py new file mode 100644 index 0000000..f91cbfa --- /dev/null +++ b/ckanext/d4science/controllers/home.py @@ -0,0 +1,71 @@ +import logging +from ckan.controllers.home import HomeController +import ckan.plugins as p +from ckan.common import OrderedDict, _, g, c +import ckan.lib.search as search +import ckan.model as model +import ckan.logic as logic +import ckan.lib.maintain as maintain +import ckan.lib.base as base +import ckan.lib.helpers as h + +class d4SHomeController(): + + #Overriding controllers.HomeController.index method + def index(self): + try: + # package search + context = {'model': model, 'session': model.Session,'user': c.user, 'auth_user_obj': c.userobj} + + facets = OrderedDict() + + default_facet_titles = { + 'organization': _('Organizations'), + 'groups': _('Groups'), + 'tags': _('Tags'), + 'res_format': _('Formats'), + 'license_id': _('Licenses'), + } + + for facet in g.facets: + if facet in default_facet_titles: + facets[facet] = default_facet_titles[facet] + else: + facets[facet] = facet + + # Facet titles + for plugin in p.PluginImplementations(p.IFacets): + facets = plugin.dataset_facets(facets, 'dataset') + + c.facet_titles = facets + + data_dict = { + 'q': '*:*', + 'facet.field': list(facets.keys()), + 'rows': 4, + 'start': 0, + 'sort': 'views_recent desc', + 'fq': 'capacity:"public"' + } + query = logic.get_action('package_search')(context, data_dict) + c.search_facets = query['search_facets'] + c.package_count = query['count'] + c.datasets = query['results'] + + #print "c.search_facets: " + #print " ".join(c.search_facets) + + except search.SearchError: + c.package_count = 0 + + if c.userobj and not c.userobj.email: + url = h.url_for(controller='user', action='edit') + msg = _('Please update your profile' + ' and add your email address. ') % url + \ + _('%s uses your email address' + ' if you need to reset your password.') \ + % g.site_title + h.flash_notice(msg, allow_html=True) + + return base.render('home/index.html', cache_force=True) + diff --git a/ckanext/d4science/controllers/organization.py b/ckanext/d4science/controllers/organization.py new file mode 100644 index 0000000..c453872 --- /dev/null +++ b/ckanext/d4science/controllers/organization.py @@ -0,0 +1,135 @@ +# encoding: utf-8 + +import re + +import ckan.controllers.group as group +import ckan.plugins as plugins +import logging +import datetime +from urllib.parse import urlencode + +from pylons.i18n import get_lang + +import ckan.lib.base as base +import ckan.lib.helpers as h +import ckan.lib.maintain as maintain +import ckan.lib.navl.dictization_functions as dict_fns +import ckan.logic as logic +import ckan.lib.search as search +import ckan.model as model +import ckan.authz as authz +import ckan.lib.plugins +import ckan.plugins as plugins +from ckan.common import OrderedDict, c, g, request, _ + + +''' +Created by Francesco Mangiacrapa, see: #8964 +''' +class OrganizationVREController(group.GroupController): + ''' The organization controller is for Organizations, which are implemented + as Groups with is_organization=True and group_type='organization'. It works + the same as the group controller apart from: + * templates and logic action/auth functions are sometimes customized + (switched using _replace_group_org) + * 'bulk_process' action only works for organizations + + Nearly all the code for both is in the GroupController (for historical + reasons). + ''' + + group_types = ['organization'] + + def _guess_group_type(self, expecting_name=False): + return 'organization' + + def _replace_group_org(self, string): + ''' substitute organization for group if this is an org''' + return re.sub('^group', 'organization', string) + + def _update_facet_titles(self, facets, group_type): + for plugin in plugins.PluginImplementations(plugins.IFacets): + facets = plugin.organization_facets( + facets, group_type, None) + + def index(self): + group_type = self._guess_group_type() + + page = h.get_page_number(request.params) or 1 + items_per_page = 21 + + context = {'model': model, 'session': model.Session, + 'user': c.user, 'for_view': True, + 'with_private': False} + + q = c.q = request.params.get('q', '') + sort_by = c.sort_by_selected = request.params.get('sort') + try: + self._check_access('site_read', context) + self._check_access('group_list', context) + except NotAuthorized: + abort(403, _('Not authorized to see this page')) + + # pass user info to context as needed to view private datasets of + # orgs correctly + if c.userobj: + context['user_id'] = c.userobj.id + context['user_is_admin'] = c.userobj.sysadmin + + data_dict_global_results = { + 'all_fields': False, + 'q': q, + 'sort': sort_by, + 'type': group_type or 'group', + } + global_results = self._action('group_list')(context, + data_dict_global_results) + + data_dict_page_results = { + 'all_fields': True, + 'q': q, + 'sort': sort_by, + 'type': group_type or 'group', + 'limit': items_per_page, + 'offset': items_per_page * (page - 1), + } + page_results = self._action('group_list')(context, + data_dict_page_results) + + c.page = h.Page( + collection=global_results, + page=page, + url=h.pager_url, + items_per_page=items_per_page, + ) + + c.page.items = page_results + return base.render('organization_vre/index.html', + extra_vars={'group_type': group_type}) + + + def read(self, id, limit=20): + group_type = self._ensure_controller_matches_group_type( + id.split('@')[0]) + + context = {'model': model, 'session': model.Session, + 'user': c.user, + 'schema': self._db_to_form_schema(group_type=group_type), + 'for_view': True} + data_dict = {'id': id, 'type': group_type} + + # unicode format (decoded from utf8) + c.q = request.params.get('q', '') + + try: + # Do not query for the group datasets when dictizing, as they will + # be ignored and get requested on the controller anyway + data_dict['include_datasets'] = False + c.group_dict = self._action('group_show')(context, data_dict) + c.group = context['group'] + except (NotFound, NotAuthorized): + abort(404, _('Group not found')) + + self._read(id, limit, group_type) + return base.render('organization_vre/read.html', + extra_vars={'group_type': group_type}) \ No newline at end of file diff --git a/ckanext/d4science/controllers/systemtype.py b/ckanext/d4science/controllers/systemtype.py new file mode 100644 index 0000000..02a38f5 --- /dev/null +++ b/ckanext/d4science/controllers/systemtype.py @@ -0,0 +1,88 @@ +import logging +import ckan.plugins as p +from ckan.common import OrderedDict, _, g, c +import ckan.lib.search as search +import ckan.model as model +import ckan.logic as logic +import ckan.lib.maintain as maintain +import ckan.lib.base as base +import ckan.lib.helpers as h + +from urllib.parse import urlencode + +#from pylons.i18n import get_lang +from flask import Blueprint, render_template, g, request + +import ckan.lib.base as base +import ckan.lib.navl.dictization_functions as dict_fns +import ckan.authz as authz + + +class d4STypeController(base.BaseController): + + #Overriding controllers.HomeController.index method + def index(self): + try: + # package search + context = {'model': model, 'session': model.Session,'user': g.user, 'auth_user_obj': g.userobj} + + facets = OrderedDict() + + default_facet_titles = { + 'organization': _('Organizations'), + 'groups': _('Groups'), + 'tags': _('Tags'), + 'res_format': _('Formats'), + 'license_id': _('Licenses'), + } + + for facet in g.facets: + if facet in default_facet_titles: + facets[facet] = default_facet_titles[facet] + else: + facets[facet] = facet + + # Facet titles + for plugin in p.PluginImplementations(p.IFacets): + facets = plugin.dataset_facets(facets, 'dataset') + + g.facet_titles = facets + + data_dict = { + 'q': '*:*', + 'facet.field': list(facets.keys()), + 'rows': 4, + 'start': 0, + 'sort': 'views_recent desc', + 'fq': 'capacity:"public"' + } + query = logic.get_action('package_search')(context, data_dict) + g.search_facets = query['search_facets'] + g.package_count = query['count'] + g.datasets = query['results'] + + #print "c.search_facets: " + #print " ".join(c.search_facets) + + except search.SearchError: + g.package_count = 0 + + if g.userobj and not g.userobj.email: + #url = h.url_for(controller='user', action='edit') pylons + url = h.url_for('user.edit') + msg = _('Please update your profile' + ' and add your email address. ') % url + \ + _('%s uses your email address' + ' if you need to reset your password.') \ + % g.site_title + h.flash_notice(msg, allow_html=True) + + #return base.render('type/index.html', cache_force=True) pylons + return render_template('type/index.html', cache_force=True) + +d4s_type_blueprint = Blueprint('d4s_type', __name__) + +@d4s_type_blueprint.route('/') +def index(): + controller = d4STypeController() + return controller.index() diff --git a/ckanext/d4science/d4sdiscovery/d4s_cache_controller.py b/ckanext/d4science/d4sdiscovery/d4s_cache_controller.py new file mode 100644 index 0000000..0462d7d --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/d4s_cache_controller.py @@ -0,0 +1,106 @@ +import datetime +import logging +import os +import tempfile +import csv + +from .icproxycontroller import NAMESPACE_ID_LABEL + +log = logging.getLogger(__name__) + +CATALINA_HOME = 'CATALINA_HOME' +temp_dir = None +namespaces_dir = None +NAMESPACES_DIR_NAME = "namespaces_for_catalogue" +NAMESPACES_CACHE_FILENAME = "Namespaces_Catalogue_Categories.csv" + +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + + +# D4S_Cache_Controller +class D4S_Cache_Controller(): + namespaces_cache_path = None + __scheduler = None + + def __init__(self): + """ Virtually private constructor. """ + log.debug("__init__ D4S_Cache_Controller") + self._check_cache() + + def _check_cache(self): + + if self.namespaces_cache_path is None: + self.init_temp_dir() + self.namespaces_cache_path = os.path.join(namespaces_dir, NAMESPACES_CACHE_FILENAME) + log.info("The namespaces cache is located at: %s" % self.namespaces_cache_path) + + if not os.path.exists(self.namespaces_cache_path): + log.info("File does not exists creating it") + try: + with open(self.namespaces_cache_path, mode='w') as namespaces_file: + csv.writer(namespaces_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + log.info("Cache created at %s" % self.namespaces_cache_path) + except Exception as e: + print(e) + + ''' Write the list of dictionary with namespaces''' + def write_namespaces(self, namespace_list_of_dict): + # Insert Data + with open(self.namespaces_cache_path, 'w') as namespaces_file: + writer = csv.writer(namespaces_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + writer.writerow([NAMESPACE_ID_LABEL, 'name', 'title', 'description']) + for namespace_dict in namespace_list_of_dict: + #print("namespace %s" % namespace_dict) + writer.writerow([namespace_dict[NAMESPACE_ID_LABEL], namespace_dict['name'], namespace_dict['title'], namespace_dict['description']]) + + log.info("Inserted %d namespaces in the Cache" % len(namespace_list_of_dict)) + + '''Returns the list of dictionary with namespaces''' + def read_namespaces(self): + # Read Data + namespace_list_of_dict = [] + try: + with open(self.namespaces_cache_path, 'r') as namespaces_file: + reader = csv.DictReader(namespaces_file) + for row in reader: + #print("read namespace %s" % row) + namespace_list_of_dict.append(dict(row)) + + log.debug("from Cache returning namespace_list_of_dict %s: " % namespace_list_of_dict) + log.info("from Cache read namespace_list_of_dict with %d item/s " % len(namespace_list_of_dict)) + return namespace_list_of_dict + except Exception as e: + print(e) + + log.info("no namespace in the Cache returning empty list of dict") + return namespace_list_of_dict + + @property + def get_namespaces_cache_path(self): + return self.namespaces_cache_path + + @classmethod + def init_temp_dir(cls): + global temp_dir + global NAMESPACES_DIR_NAME + global namespaces_dir + try: + temp_dir = str(os.environ[CATALINA_HOME]) + temp_dir = os.path.join(temp_dir, "temp") + except KeyError as error: + log.error("No environment variable for: %s" % CATALINA_HOME) + + if temp_dir is None: + temp_dir = tempfile.gettempdir() # using system tmp dir + + log.debug("Temp dir is: %s" % temp_dir) + + namespaces_dir = os.path.join(temp_dir, NAMESPACES_DIR_NAME) + + if not os.path.exists(namespaces_dir): + os.makedirs(namespaces_dir) + + + diff --git a/ckanext/d4science/d4sdiscovery/d4s_extras.py b/ckanext/d4science/d4sdiscovery/d4s_extras.py new file mode 100644 index 0000000..af3e9ce --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/d4s_extras.py @@ -0,0 +1,31 @@ +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + +## questo file va bene anche in p3 ## +import logging +log = logging.getLogger(__name__) + +class D4S_Extras(): + + def __init__(self, category_dict={}, extras=[]): + self._category = category_dict + self._extras = extras + + def append_extra(self, k, v): + #print ("self._extras: %s" %self._extras) + if k is not None: + self._extras.append({k:v}) + + @property + def category(self): + return self._category + + @property + def extras(self): + return self._extras + + def __repr__(self): + return 'category: %s'%self._category+' ' \ + 'extras: %s'%self._extras + diff --git a/ckanext/d4science/d4sdiscovery/d4s_namespaces.py b/ckanext/d4science/d4sdiscovery/d4s_namespaces.py new file mode 100644 index 0000000..54f1e90 --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/d4s_namespaces.py @@ -0,0 +1,39 @@ +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + +#OrderedDict([(u'@id', u'extra_information'), (u'name', u'Extra Information'), (u'title', u'Extras'), (u'description', u'This section is about Extra(s)')]), u'contact': OrderedDict([(u'@id', u'contact'), (u'name', u'Contact'), (u'title', u'Contact Title'), (u'description', u'This section is about Contact(s)')]), u'developer_information': OrderedDict([(u'@id', u'developer_information'), (u'name', u'Developer'), (u'title', u'Developer Information'), (u'description', u'This section is about Developer(s)')])} + +import logging +log = logging.getLogger(__name__) + +class D4S_Namespaces(): + + def __init__(self, id=None, name=None, title=None, description=None): + self._id = id + self._name = name + self._title = title + self._description = description + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + @property + def title(self): + return self._title + + + @property + def description(self): + return self._description + + def __repr__(self): + return '{id: %s'%self.id+', ' \ + 'name: %s'%self.name+ ', ' \ + 'title: %s'%self.title+ ', ' \ + 'description: %s'%self.description+ '}' diff --git a/ckanext/d4science/d4sdiscovery/d4s_namespaces_controller.py b/ckanext/d4science/d4sdiscovery/d4s_namespaces_controller.py new file mode 100644 index 0000000..afb88df --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/d4s_namespaces_controller.py @@ -0,0 +1,130 @@ +import logging +import time + +from .d4s_cache_controller import D4S_Cache_Controller +from .icproxycontroller import D4S_IS_DiscoveryCatalogueNamespaces +from threading import Event, Thread + +CATEGORY = 'category' +NOCATEOGORY = 'nocategory' + +log = logging.getLogger(__name__) + +cancel_future_calls = None + +# Refreshing time for namespaces cache in secs. +NAMESPACES_CACHE_REFRESHING_TIME = 60 * 60 + + +# Funtion to call repeatedly another function +def call_repeatedly(interval, func, *args): + log.info("call_repeatedly called on func '{}' with interval {} sec".format(func.__name__, interval)) + stopped = Event() + + def loop(): + while not stopped.wait(interval): # the first call is in `interval` secs + func(*args) + + th = Thread(name='daemon_caching_namespaces', target=loop) + th.setDaemon(True) + th.start() + return stopped.set + + +def reload_namespaces_from_IS(urlICProxy, resourceID, gcubeToken): + log.info("_reload_namespaces_from_IS called") + try: + discovery_ctg_namespaces = D4S_IS_DiscoveryCatalogueNamespaces(urlICProxy, resourceID, gcubeToken) + namespaces_list_of_dict = discovery_ctg_namespaces.getNamespacesDictFromResource() + + if namespaces_list_of_dict is not None and len(namespaces_list_of_dict) > 0: + log.debug("namespaces read from IS are: %s" % namespaces_list_of_dict) + D4S_Cache_Controller().write_namespaces(namespaces_list_of_dict) + else: + log.info("namespaces list read from IS is empty. Skipping caching update") + + except Exception as e: + print("Error occurred on reading namespaces from IS and refilling the cache!") + print(e) + + +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + + +# D4S_IS_DiscoveryCatalogueNamespacesController is used to discovery namespaces for Catalogue Categories (implemented as a Singleton) +# @param: urlICProxy is the URI of IC proxy rest-full service provided by IS +# @param: resourceID is the resource ID of the Generic Resource: "Namespaces Catalogue Categories" +# @param: gcubeToken the gcube token used to contact the IC proxy +class D4S_Namespaces_Controller(): + __instance = None + + @staticmethod + def getInstance(): + """ Static access method. """ + if D4S_Namespaces_Controller.__instance is None: + D4S_Namespaces_Controller() + + return D4S_Namespaces_Controller.__instance + + def __init__(self): + """ Virtually private constructor. """ + log.debug("__init__ D4S_Namespaces_Controller") + + if D4S_Namespaces_Controller.__instance is not None: + raise Exception("This class is a singleton!") + else: + D4S_Namespaces_Controller.__instance = self + + self._d4s_cache_controller = D4S_Cache_Controller() + self._urlICProxy = None + self._resourceID = None + self._gcubeToken = None + + def load_namespaces(self, urlICProxy, resourceID, gcubeToken): + log.debug("readNamespaces called") + self._urlICProxy = urlICProxy + self._resourceID = resourceID + self._gcubeToken = gcubeToken + return self._check_namespaces() + + def _read_namespaces(self): + return self._d4s_cache_controller.read_namespaces() + + def _check_namespaces(self): + log.debug("_check_namespaces called") + + if self._d4s_cache_controller is None: + self._d4s_cache_controller = D4S_Cache_Controller() + + namespace_list = self._read_namespaces() + + # when the Cache is empty + if namespace_list is None or not namespace_list: + # reading namespaces from IS and filling the DB + log.info("The Cache is empty. Reading the namespace from IS and filling the Cache") + reload_namespaces_from_IS(self._urlICProxy, self._resourceID, self._gcubeToken) + # reloading the namespaces from the cache + namespace_list = self._read_namespaces() + + # starting Thread daemon for refreshing the namespaces Cache + global cancel_future_calls + if cancel_future_calls is None: + cancel_future_calls = call_repeatedly(NAMESPACES_CACHE_REFRESHING_TIME, reload_namespaces_from_IS, + self._urlICProxy, + self._resourceID, + self._gcubeToken) + + return namespace_list + + def get_dict_ctg_namespaces(self): + log.debug("get_dict_ctg_namespaces called") + namespace_list_of_dict = self._check_namespaces() + return self.convert_namespaces_to_d4s_namespacedict(namespace_list_of_dict) + + # Private method + @staticmethod + def convert_namespaces_to_d4s_namespacedict(namespace_list_of_dict): + log.debug("convert_namespaces_to_d4s_namespacedict called on %s" % namespace_list_of_dict) + return D4S_IS_DiscoveryCatalogueNamespaces.to_namespaces_dict_index_for_id(namespace_list_of_dict) diff --git a/ckanext/d4science/d4sdiscovery/d4s_namespaces_extras_util.py b/ckanext/d4science/d4sdiscovery/d4s_namespaces_extras_util.py new file mode 100644 index 0000000..edb0bbf --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/d4s_namespaces_extras_util.py @@ -0,0 +1,89 @@ +import logging +import collections +from .d4s_extras import D4S_Extras + +CATEGORY = 'category' +NOCATEOGORY = 'nocategory' + +log = logging.getLogger(__name__) + +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + + +# D4S_Namespaces_Extra_Util is used to get the extra fields indexed for D4Science namespaces +# @param: namespace_dict is the namespace dict of D4Science namespaces (defined in the Generic Resource: "Namespaces Catalogue Categories") +# @param: extras is the dictionary of extra fields for a certain item +class D4S_Namespaces_Extra_Util(): + + def get_extras_indexed_for_namespaces(self, namespace_dict, extras): + extras_for_categories = collections.OrderedDict() + + # ADDING ALL EXTRAS WITH NAMESPACE + for namespaceid in list(namespace_dict.keys()): + dict_extras = None + nms = namespaceid + ":" + #has_namespace_ref = None + for key, value in extras: + k = key + v = value + # print "key: " + k + # print "value: " + v + if k.startswith(nms): + + if namespaceid not in extras_for_categories: + extras_for_categories[namespaceid] = collections.OrderedDict() + + dict_extras = extras_for_categories[namespaceid] + log.debug("dict_extras %s "%dict_extras) + + if (dict_extras is None) or (not dict_extras): + dict_extras = D4S_Extras(namespace_dict.get(namespaceid), []) + log.debug("dict_extras after init %s " % dict_extras) + + #print ("dict_extras after init %s " % dict_extras) + log.debug("replacing namespace into key %s " % k +" with empty string") + nms = namespaceid + ":" + k = k.replace(nms, "") + dict_extras.append_extra(k, v) + extras_for_categories[namespaceid] = dict_extras + log.debug("adding d4s_extra: %s " % dict_extras+ " - to namespace id: %s" %namespaceid) + #has_namespace_ref = True + #break + + #ADDING ALL EXTRAS WITHOUT NAMESPACE + for key, value in extras: + k = key + v = value + + has_namespace_ref = None + for namespaceid in list(namespace_dict.keys()): + nms = namespaceid + ":" + #IF KEY NOT STARTING WITH NAMESPACE + if k.startswith(nms): + has_namespace_ref = True + log.debug("key: %s " % k + " - have namespace: %s" % nms) + break + + if has_namespace_ref is None: + log.debug("key: %s " % k + " - have not namespace") + if NOCATEOGORY not in extras_for_categories: + extras_for_categories[NOCATEOGORY] = collections.OrderedDict() + + dict_extras_no_cat = extras_for_categories[NOCATEOGORY] + #print ("dict_extras_no_cat %s " % dict_extras_no_cat) + + if (dict_extras_no_cat is None) or (not dict_extras_no_cat): + dict_extras_no_cat = D4S_Extras(NOCATEOGORY, []) + + #print ("adding key: %s "%k+" - value: %s"%v) + log.debug("NOCATEOGORY adding key: %s " % k + " - value: %s" % v) + + dict_extras_no_cat.append_extra(k, v) + log.debug("dict_extras_no_cat %s " % dict_extras_no_cat) + extras_for_categories[NOCATEOGORY] = dict_extras_no_cat + log.debug("extras_for_categories NOCATEOGORY %s " % extras_for_categories) + + return extras_for_categories + diff --git a/ckanext/d4science/d4sdiscovery/icproxycontroller.py b/ckanext/d4science/d4sdiscovery/icproxycontroller.py new file mode 100644 index 0000000..f1c03ce --- /dev/null +++ b/ckanext/d4science/d4sdiscovery/icproxycontroller.py @@ -0,0 +1,110 @@ +import logging +import urllib.request, urllib.error, urllib.parse +from lxml import etree + +import xmltodict +import collections + +from .d4s_namespaces import D4S_Namespaces + +XPATH_NAMESPACES = "/Resource/Profile/Body/namespaces" +gcubeTokenParam = "gcube-token" +NAMESPACE_ID_LABEL = '@id' + +log = logging.getLogger(__name__) + + +# Created by Francesco Mangiacrapa +# francesco.mangiacrapa@isti.cnr.it +# ISTI-CNR Pisa (ITALY) + +def getResponseBody(uri): + req = urllib.request.Request(uri) + try: + resp = urllib.request.urlopen(req, timeout=20) + except urllib.error.HTTPError as e: + log.error("Error on contacting URI: %s" % uri) + log.error("HTTPError: %d" % e.code) + return None + except urllib.error.URLError as e: + # Not an HTTP-specific error (e.g. connection refused) + log.error("URLError - Input URI: %s " % uri + " is not valid!!") + return None + else: + # 200 + body = resp.read() + return body + + +# D4S_IS_DiscoveryCatalogueNamespaces is used to discovery namespaces for Catalogue Categories. +# @param: urlICProxy is the URI of IC proxy rest-full service provided by IS +# @param: resourceID is the resource ID of the Generic Resource: "Namespaces Catalogue Categories" +# @param: gcubeToken the gcube token used to contact the IC proxy +class D4S_IS_DiscoveryCatalogueNamespaces(): + + def __init__(self, urlICProxy, resourceID, gcubeToken): + self.urlICProxy = urlICProxy + self.resourceID = resourceID + self.gcubeToken = gcubeToken + + def getNamespacesDictFromResource(self): + + doc = {} + namespace_list = [] + + try: + # print("proxy: "+self.urlICProxy) + # print("resourceID: " + self.resourceID) + # print("gcubeTokenParam: " + gcubeTokenParam) + # print("gcubeToken: " + self.gcubeToken) + + uri = self.urlICProxy + "/" + self.resourceID + "?" + gcubeTokenParam + "=" + self.gcubeToken + log.info("Contacting URL: %s" % uri) + theResource = getResponseBody(uri) + log.debug("Resource returned %s " % theResource) + theResourceXML = etree.XML(theResource) + theNamespaces = theResourceXML.xpath(XPATH_NAMESPACES) + log.debug("The body %s" % etree.tostring(theNamespaces[0])) + + if theNamespaces is not None and theNamespaces[0] is not None: + bodyToString = etree.tostring(theNamespaces[0]) + doc = xmltodict.parse(bodyToString) + else: + log.warn("No Namespace for Catalogue Categories found, returning None") + except Exception as inst: + log.error("Error on getting catalogue namespaces: " + str(inst)) + log.info("Returning empty list of namespaces") + return namespace_list + + log.debug("IS namespaces resource to dict is: %s" % doc) + + + if ('namespaces' in doc): + # log.debug('Namespaces obj %s:' % doc['namespaces']) + namespaces = doc['namespaces'] + if doc is not None and 'namespace' in namespaces: + namespace_list = namespaces['namespace'] + + log.info("Loaded %d namespaces from IS resource" % len(namespace_list)) + return namespace_list + + @staticmethod + def to_namespaces_dict_index_for_id(namespace_list): + namespace_dict = collections.OrderedDict() + log.debug("namespaces to dict: %s" % namespace_list) + try: + if namespace_list is not None and len(namespace_list) > 0: + for namespace in namespace_list: + try: + if NAMESPACE_ID_LABEL in namespace: + namespace_dict[namespace[NAMESPACE_ID_LABEL]] = D4S_Namespaces( + namespace[NAMESPACE_ID_LABEL], + namespace['name'], + namespace['title'], + namespace['description']) + except Exception as inst: + log.error("Error on converting catalogue namespaces: " + str(inst)) + except Exception as inst: + log.error("Error on checking namespace_list: " + str(inst)) + # print "namespace_dict to Nam: %s"%namespace_dict + return namespace_dict