From c0354a322e1a034a444b0414ccb20e4a749410e2 Mon Sep 17 00:00:00 2001 From: Alessio Fabrizio Date: Mon, 2 Dec 2024 15:47:41 +0100 Subject: [PATCH] feat: Add privatedataset inside the plugin Add privatedataset files (backend) to d4science plugin --- .../ckanext/d4science_theme/db.py | 2 +- .../fanstatic/allowed_users.js | 56 ++++ .../ckanext/d4science_theme/helpers.py | 12 +- .../ckanext/d4science_theme/plugin.py | 307 +++++++++++++++++- .../privatedatasets/actions.py | 239 ++++++++++++++ .../d4science_theme/privatedatasets/auth.py | 130 ++++++++ .../privatedatasets/constants.py | 28 ++ .../privatedatasets/converters_validators.py | 108 ++++++ .../privatedatasets/parser/__init__.py | 0 .../privatedatasets/parser/fiware.py | 69 ++++ .../d4science_theme/privatedatasets/views.py | 53 +++ 11 files changed, 992 insertions(+), 12 deletions(-) create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/fanstatic/allowed_users.js create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/actions.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/auth.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/constants.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/converters_validators.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/parser/__init__.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/parser/fiware.py create mode 100644 ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/views.py diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/db.py b/ckanext-d4science_theme/ckanext/d4science_theme/db.py index 034a3ac..88099d9 100644 --- a/ckanext-d4science_theme/ckanext/d4science_theme/db.py +++ b/ckanext-d4science_theme/ckanext/d4science_theme/db.py @@ -27,7 +27,6 @@ log = getLogger(__name__) AllowedUser = None def init_db(model): - log.debug("call initDB...") global AllowedUser if AllowedUser is None: @@ -38,6 +37,7 @@ def init_db(model): '''Finds all the instances required.''' query = model.Session.query(cls).autoflush(False) results = query.filter_by(**kw).all() + ### TODO: capire se questo controllo serve log.debug("results in get %s", results) if not isinstance(results, list): log.debug("Errore: il risultato di get() non è una lista. Risultato:", results) diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/fanstatic/allowed_users.js b/ckanext-d4science_theme/ckanext/d4science_theme/fanstatic/allowed_users.js new file mode 100644 index 0000000..0d27556 --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/fanstatic/allowed_users.js @@ -0,0 +1,56 @@ +/* + * (C) Copyright 2014 CoNWeT Lab., Universidad Politécnica de Madrid + * + * This file is part of CKAN Private Dataset Extension. + * + * CKAN Private Dataset Extension is free software: you can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * CKAN Private Dataset Extension is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + * License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with CKAN Private Dataset Extension. If not, see + * . + * + */ + +/* Dataset allowed_users, searchable and acquire_url toggler + * allowed_users, acquire_url and searchable can only be active when a + * user attempts to create a private dataset + */ + +this.ckan.module('allowed-users', function ($, _) { + return { + initialize: function() { + this.original_acquire_url = $('[name=acquire_url]').val(); + $('#field-private').on('change', this._onChange); + this._onChange(); //Initial + }, + _onChange: function() { + var ds_private = $('#field-private').val(); + + if (ds_private == 'True') { + $('#field-allowed_users_str').prop('disabled', false); //Enable + $('#field-acquire_url').prop('disabled', false); //Enable + $('#field-searchable').prop('disabled', false); //Enable + $('[name=acquire_url]').val(this.original_acquire_url); //Set previous acquire URL + } else { + $('#field-allowed_users_str').prop('disabled', true); //Disable + $('#field-acquire_url').prop('disabled', true); //Disable + $('#field-searchable').prop('disabled', true); //Disable + + //Remove previous values + $('#field-allowed_users_str').select2('val', ''); + this.original_acquire_url = $('[name=acquire_url]').val(); //Get previous value + $('[name=acquire_url]').val(''); //Acquire URL should be reseted + $('#field-searchable').val('True'); + } + } + }; + }); + \ No newline at end of file diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/helpers.py b/ckanext-d4science_theme/ckanext/d4science_theme/helpers.py index e6ffcbd..e70b5e8 100644 --- a/ckanext-d4science_theme/ckanext/d4science_theme/helpers.py +++ b/ckanext-d4science_theme/ckanext/d4science_theme/helpers.py @@ -733,7 +733,7 @@ def get_site_statistics() -> dict[str, int]: 'organization_count': len(logic.get_action('organization_list')({}, {})) } -#private dataset +####### PRIVATE DATASETS SECTION ######## def is_dataset_acquired(pkg_dict): @@ -749,11 +749,11 @@ def is_dataset_acquired(pkg_dict): return False def is_owner(pkg_dict): - return False - #if tk.c.userobj is not None: - # return tk.c.userobj.id == pkg_dict['creator_user_id'] - #else: - # return False + #return False #TODO: debug this + if tk.c.userobj is not None: + return tk.c.userobj.id == pkg_dict['creator_user_id'] + else: + return False def get_allowed_users_str(users): diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/plugin.py b/ckanext-d4science_theme/ckanext/d4science_theme/plugin.py index 2076b60..5cd93b1 100644 --- a/ckanext-d4science_theme/ckanext/d4science_theme/plugin.py +++ b/ckanext-d4science_theme/ckanext/d4science_theme/plugin.py @@ -15,7 +15,7 @@ from ckan.config.middleware.common_middleware import TrackingMiddleware import ckan.lib.dictization.model_save as model_save #from ckan.controllers.home import HomeController #from ckan.plugins import IRoutes -from flask import Blueprint, render_template +from flask import Blueprint, render_template, Flask, g, redirect, url_for from ckan.types import Context from typing import ( @@ -26,8 +26,15 @@ from typing import ( from ckan.common import ( g ) -from flask import Flask, g +#private datasets imports +from ckan.lib.plugins import DefaultPermissionLabels +from ckanext.d4science_theme.privatedatasets import auth, actions, constants, converters_validators as conv_val +from ckanext.d4science_theme import db +from ckanext.d4science_theme.privatedatasets.views import acquired_datasets +from ckan.lib import search + +HIDDEN_FIELDS = [constants.ALLOWED_USERS, constants.SEARCHABLE] log = getLogger(__name__) @@ -172,7 +179,7 @@ def _init_TrackingMiddleware(self, app, config): self.engine = sa.create_engine(sqlalchemy_url) -class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm): +class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm, DefaultPermissionLabels): plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IDatasetForm, inherit=True) plugins.implements(plugins.ITemplateHelpers) @@ -184,6 +191,14 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) plugins.implements(plugins.IValidators) plugins.implements(plugins.IPackageController, inherit=True) + #PRIVATE DATASETS SECTION + plugins.implements(plugins.IAuthFunctions) + #plugins.implements(plugins.IRoutes, inherit=True) + plugins.implements(plugins.IActions) + plugins.implements(plugins.IPermissionLabels) + plugins.implements(plugins.IResourceController) + + # IConfigurer def update_config(self, config_): # Add this plugin's templates dir to CKAN's extra_template_paths, so @@ -202,13 +217,26 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) toolkit.add_resource('assets', 'd4science_theme') def _modify_package_schema(self): - log.debug("*** modify package ***") - # Personalizza il campo 'extras' rimuovendo il validatore extra_key_not_in_root_schema return { + #private datasets 'private': [toolkit.get_validator('ignore_missing'), toolkit.get_validator('boolean_validator')], + constants.ALLOWED_USERS_STR: [toolkit.get_validator('ignore_missing'), + conv_val.private_datasets_metadata_checker], + constants.ALLOWED_USERS: [conv_val.allowed_users_convert, + toolkit.get_validator('ignore_missing'), + conv_val.private_datasets_metadata_checker], + constants.ACQUIRE_URL: [toolkit.get_validator('ignore_missing'), + conv_val.private_datasets_metadata_checker, + conv_val.url_checker, + toolkit.get_converter('convert_to_extras')], + constants.SEARCHABLE: [toolkit.get_validator('ignore_missing'), + conv_val.private_datasets_metadata_checker, + toolkit.get_converter('convert_to_extras'), + toolkit.get_validator('boolean_validator')], + #d4science_theme 'extras': { 'id': [toolkit.get_validator('ignore')], # Ignora 'id' come prima 'key': [toolkit.get_validator('not_empty'), toolkit.get_validator('unicode_safe'), validators.ignore_duplicate_keys], # Aggiunto ignore duplicate @@ -247,6 +275,14 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) schema = remove_check_replicated_custom_key(schema) #log.debug("show_package1.5 no before %s", schema) schema.update({ + #private datasets + constants.ALLOWED_USERS: [conv_val.get_allowed_users, + toolkit.get_validator('ignore_missing')], + constants.ACQUIRE_URL: [toolkit.get_converter('convert_from_extras'), + toolkit.get_validator('ignore_missing')], + constants.SEARCHABLE: [toolkit.get_converter('convert_from_extras'), + toolkit.get_validator('ignore_missing')], + #d4science_theme 'extras': { 'id': [toolkit.get_validator('ignore')], # Ignora 'id' come prima 'key': [toolkit.get_validator('not_empty'), toolkit.get_validator('unicode_safe'), validators.ignore_duplicate_keys], # Aggiunto ignore duplicate @@ -255,6 +291,19 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) }) return schema + + #IDatasetForm + def is_fallback(self): + # Return True to register this plugin as the default handler for package types not handled by any other IDatasetForm plugin + return True + + #IDatasetForm + def package_types(self): + # This plugin doesn't handle any special package types, it just + # registers itself as the default (above). + return [] + + #IDatasetForm def is_fallback(self): # Return True to register this plugin as the default handler for package types not handled by any other IDatasetForm plugin @@ -351,6 +400,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) 'd4science_get_location_to_bboxes' : helpers.get_location_to_bboxes, 'd4science_get_content_moderator_system_placeholder': helpers.get_content_moderator_system_placeholder, 'd4science_get_site_statistics': helpers.get_site_statistics, + #privatedatasets section 'is_dataset_acquired': helpers.is_dataset_acquired, 'get_allowed_users_str': helpers.get_allowed_users_str, 'is_owner': helpers.is_owner, @@ -462,6 +512,7 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) ('/organization_vre', 'organization_vre', d4sOC.index), ('/tags', 'tags', tags), ('/groups', 'groups', groups), + ('/dashboard/acquired', 'acquired_datasets', acquired_datasets) #privatedatasets rule ] for rule in rules: blueprint.add_url_rule(*rule) @@ -508,3 +559,249 @@ class D4Science_ThemePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm) return facets_dict + ###################################################################### + ######################## PRIVATEDATASET FORM ######################### + ###################################################################### + + def __init__(self, name=None): + self.indexer = search.PackageSearchIndex() + + def is_fallback(self): + # Return True to register this plugin as the default handler for package types not handled by any other IDatasetForm plugin + return True + + def package_types(self): + # This plugin doesn't handle any special package types, it just + # registers itself as the default (above). + return [] + + ###################################################################### + ########################### AUTH FUNCTIONS ########################### + ###################################################################### + + def get_auth_functions(self): + auth_functions = {'package_show': auth.package_show, + 'package_update': auth.package_update, + 'resource_show': auth.resource_show, + constants.PACKAGE_ACQUIRED: auth.package_acquired, + constants.ACQUISITIONS_LIST: auth.acquisitions_list, + constants.PACKAGE_DELETED: auth.revoke_access} + + return auth_functions + + ###################################################################### + ############################## IACTIONS ############################## + ###################################################################### + + def get_actions(self): + action_functions = {constants.PACKAGE_ACQUIRED: actions.package_acquired, + constants.ACQUISITIONS_LIST: actions.acquisitions_list, + constants.PACKAGE_DELETED: actions.revoke_access} + + return action_functions + + ###################################################################### + ######################### IPACKAGECONTROLLER ######################### + ###################################################################### + + def _delete_pkg_atts(self, pkg_dict, attrs): + for attr in attrs: + if attr in pkg_dict: + del pkg_dict[attr] + + def before_dataset_index(self, pkg_dict): + + if 'extras_' + constants.SEARCHABLE in pkg_dict: + if pkg_dict['extras_searchable'] == 'False': + pkg_dict['capacity'] = 'private' + else: + pkg_dict['capacity'] = 'public' + + return pkg_dict + + def after_dataset_create(self, context, pkg_dict): + session = context['session'] + update_cache = False + + db.init_db(context['model']) + + # Get the users and the package ID + if constants.ALLOWED_USERS in pkg_dict: + + allowed_users = pkg_dict[constants.ALLOWED_USERS] + package_id = pkg_dict['id'] + + # Get current users + users = db.AllowedUser.get(package_id=package_id) + + # Delete users and save the list of current users + current_users = [] + for user in users: + current_users.append(user.user_name) + if user.user_name not in allowed_users: + session.delete(user) + update_cache = True + + # Add non existing users + for user_name in allowed_users: + if user_name not in current_users: + out = db.AllowedUser() + out.package_id = package_id + out.user_name = user_name + out.save() + session.add(out) + update_cache = True + + session.commit() + + # The cache should be updated. Otherwise, the system may return + # outdated information in future requests + if update_cache: + new_pkg_dict = toolkit.get_action('package_show')( + {'model': context['model'], + 'ignore_auth': True, + 'validate': False, + 'use_cache': False}, + {'id': package_id}) + + # Prevent acquired datasets jumping to the first position + revision = toolkit.get_action('revision_show')({'ignore_auth': True}, {'id': new_pkg_dict['revision_id']}) + new_pkg_dict['metadata_modified'] = revision.get('timestamp', '') + self.indexer.update_dict(new_pkg_dict) + + return pkg_dict + + def after_dataset_update(self, context, pkg_dict): + return self.after_dataset_create(context, pkg_dict) + + def after_dataset_show(self, context, pkg_dict): + + void = False + + for resource in pkg_dict['resources']: + if resource == {}: + void = True + + if void: + del pkg_dict['resources'] + del pkg_dict['num_resources'] + + user_obj = context.get('auth_user_obj') + updating_via_api = context.get(constants.CONTEXT_CALLBACK, False) + + # allowed_users and searchable fileds can be only viewed by (and only if the dataset is private): + # * the dataset creator + # * the sysadmin + # * users allowed to update the allowed_users list via the notification API + if pkg_dict.get('private') is False or not updating_via_api and (not user_obj or (pkg_dict['creator_user_id'] != user_obj.id and not user_obj.sysadmin)): + # The original list cannot be modified + attrs = list(HIDDEN_FIELDS) + self._delete_pkg_atts(pkg_dict, attrs) + + return pkg_dict + + def after_dataset_delete(self, context, pkg_dict): + session = context['session'] + package_id = pkg_dict['id'] + + # Get current users + db.init_db(context['model']) + users = db.AllowedUser.get(package_id=package_id) + + # Delete all the users + for user in users: + session.delete(user) + session.commit() + + return pkg_dict + + def after_dataset_search(self, search_results, search_params): + for result in search_results['results']: + # Extra fields should not be returned + # The original list cannot be modified + attrs = list(HIDDEN_FIELDS) + + # Additionally, resources should not be included if the user is not allowed + # to show the resource + context = { + 'model': model, + 'session': model.Session, + 'user': toolkit.c.user, + 'user_obj': toolkit.c.userobj + } + + try: + toolkit.check_access('package_show', context, result) + except toolkit.NotAuthorized: + # NotAuthorized exception is risen when the user is not allowed + # to read the package. + attrs.append('resources') + # Delete + self._delete_pkg_atts(result, attrs) + + return search_results + + def before_dataset_view(self, pkg_dict): + + for resource in pkg_dict['resources']: + + context = { + 'model': model, + 'session': model.Session, + 'user': toolkit.c.user, + 'user_obj': toolkit.c.userobj + } + + try: + toolkit.check_access('resource_show', context, resource) + except toolkit.NotAuthorized: + pkg_dict['resources'].remove(resource) + pkg_dict = self.before_view(pkg_dict) + return pkg_dict + + def get_dataset_labels(self, dataset_obj): + labels = super(D4Science_ThemePlugin, self).get_dataset_labels( + dataset_obj) + + if getattr(dataset_obj, 'searchable', False): + labels.append('searchable') + + return labels + + def get_user_dataset_labels(self, user_obj): + labels = super(D4Science_ThemePlugin, self).get_user_dataset_labels( + user_obj) + + labels.append('searchable') + return labels + + ###################################################################### + ######################### IRESOURCECONTROLLER ######################## + ###################################################################### + + def before_resource_create(self, context, resource): + pass + + def after_resource_create(self, context, resource): + pass + + def before_resource_update(self, context, current, resource): + pass + + def before_resource_delete(self, context, resource, resources): + pass + + def before_resource_show(self, resource_dict): + + context = { + 'model': model, + 'session': model.Session, + 'user': toolkit.c.user, + 'user_obj': toolkit.c.userobj + } + + try: + toolkit.check_access('resource_show', context, resource_dict) + except toolkit.NotAuthorized: + resource_dict.clear() + return resource_dict \ No newline at end of file diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/actions.py b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/actions.py new file mode 100644 index 0000000..23b93f5 --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/actions.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 - 2017 CoNWeT Lab., Universidad Politécnica de Madrid +# Copyright (c) 2018 Future Internet Consulting and Development Solutions S.L. + +# This file is part of CKAN Private Dataset Extension. + +# CKAN Private Dataset Extension is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# CKAN Private Dataset Extension is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with CKAN Private Dataset Extension. If not, see . + +from __future__ import absolute_import + +import importlib +import logging +import os + +import ckan.plugins as plugins + +from ckanext.d4science_theme.privatedatasets import constants +from ckanext.d4science_theme import db + +log = logging.getLogger(__name__) + +PARSER_CONFIG_PROP = 'ckan.d4science_theme.privatedatasets.parser' + + +def package_acquired(context, request_data): + ''' + API action to be called every time a user acquires a dataset in an external service. + + This API should be called to add the user to the list of allowed users. + + Since each service can provide a different way of pushing the data, the received + data will be forwarded to the parser set in the preferences. This parser should + return a dict similar to the following one: + {'errors': ["...", "...", ...] + 'users_datasets': [{'user': 'user_name', 'datasets': ['ds1', 'ds2', ...]}, ...]} + 1) 'errors' contains the list of errors. It should be empty if no errors arised + while the notification is parsed + 2) 'users_datasets' is the lists of datasets available for each user (each element + of this list is a dictionary with two fields: user and datasets). + + :parameter request_data: Depends on the parser + :type request_data: dict + + :return: A list of warnings or None if the list of warnings is empty + :rtype: dict + + ''' + context['method'] = 'grant' + return _process_package(context, request_data) + + +def acquisitions_list(context, data_dict): + ''' + API to retrieve the list of datasets that have been acquired by a certain user + + :parameter user: The user whose acquired dataset you want to retrieve. This parameter + is optional. If you don't include this identifier, the system will use the one + of the user that is performing the request + :type user: string + + :return: The list of datarequest that has been acquired by the specified user + :rtype: list + ''' + + if data_dict is None: + data_dict = {} + + if 'user' not in data_dict and 'user' in context: + data_dict['user'] = context['user'] + + plugins.toolkit.check_access(constants.ACQUISITIONS_LIST, context.copy(), data_dict) + + # Init db + db.init_db(context['model']) + + # Init the result array + result = [] + + # Check that the user exists + try: + plugins.toolkit.get_validator('user_name_exists')(data_dict['user'], context.copy()) + except Exception: + raise plugins.toolkit.ValidationError('User %s does not exist' % data_dict['user']) + + # Get the datasets acquired by the user + query = db.AllowedUser.get(user_name=data_dict['user']) + + # Get the datasets + for dataset in query: + try: + dataset_show_func = 'package_show' + func_data_dict = {'id': dataset.package_id} + internal_context = context.copy() + + # Check that the the dataset can be accessed and get its data + # FIX: If the check_access function is not called, an exception is risen. + plugins.toolkit.check_access(dataset_show_func, internal_context, func_data_dict) + dataset_dict = plugins.toolkit.get_action(dataset_show_func)(internal_context, func_data_dict) + + # Only packages with state == 'active' can be shown + if dataset_dict.get('state', None) == 'active': + result.append(dataset_dict) + except Exception: + pass + + return result + + +def revoke_access(context, request_data): + ''' + API action to be called in order to revoke access grants of an user. + + This API should be called to delete the user from the list of allowed users. + + Since each service can provide a different way of pushing the data, the received + data will be forwarded to the parser set in the preferences. This parser should + return a dict similar to the following one: + {'errors': ["...", "...", ...] + 'users_datasets': [{'user': 'user_name', 'datasets': ['ds1', 'ds2', ...]}, ...]} + 1) 'errors' contains the list of errors. It should be empty if no errors arised + while the notification is parsed + 2) 'users_datasets' is the lists of datasets available for each user (each element + of this list is a dictionary with two fields: user and datasets). + + :parameter request_data: Depends on the parser + :type request_data: dict + + :return: A list of warnings or None if the list of warnings is empty + :rtype: dict + + ''' + context['method'] = 'revoke' + return _process_package(context, request_data) + + +def _process_package(context, request_data): + log.info('Notification received: %s' % request_data) + + # Check access + method = constants.PACKAGE_ACQUIRED if context.get('method') == 'grant' else constants.PACKAGE_DELETED + plugins.toolkit.check_access(method, context, request_data) + + # Get the parser from the configuration + class_path = os.environ.get(PARSER_CONFIG_PROP.upper().replace('.', '_'), plugins.toolkit.config.get(PARSER_CONFIG_PROP, '')) + + if class_path != '': + try: + cls = class_path.split(':') + class_package = cls[0] + class_name = cls[1] + parser_cls = getattr(importlib.import_module(class_package), class_name) + parser = parser_cls() + except Exception as e: + raise plugins.toolkit.ValidationError({'message': '%s: %s' % (type(e).__name__, str(e))}) + else: + raise plugins.toolkit.ValidationError({'message': '%s not configured' % PARSER_CONFIG_PROP}) + + # Parse the result using the parser set in the configuration + # Expected result: {'errors': ["...", "...", ...] + # 'users_datasets': [{'user': 'user_name', 'datasets': ['ds1', 'ds2', ...]}, ...]} + result = parser.parse_notification(request_data) + + warns = [] + + for user_info in result['users_datasets']: + for dataset_id in user_info['datasets']: + + try: + context_pkg_show = context.copy() + context_pkg_show['ignore_auth'] = True + context_pkg_show[constants.CONTEXT_CALLBACK] = True + dataset = plugins.toolkit.get_action('package_show')(context_pkg_show, {'id': dataset_id}) + + # This operation can only be performed with private datasets + # This check is redundant since the package_update function will throw an exception + # if a list of allowed users is included in a public dataset. However, this check + # should be performed in order to avoid strange future exceptions + if dataset.get('private', None) is True: + + # Create the array if it does not exist + if constants.ALLOWED_USERS not in dataset or dataset[constants.ALLOWED_USERS] is None: + dataset[constants.ALLOWED_USERS] = [] + + method = context['method'] == 'grant' + present = user_info['user'] in dataset[constants.ALLOWED_USERS] + # Deletes the user only if it is in the list + if (not method and present) or (method and not present): + if method: + dataset[constants.ALLOWED_USERS].append(user_info['user']) + else: + dataset[constants.ALLOWED_USERS].remove(user_info['user']) + + context_pkg_update = context.copy() + context_pkg_update['ignore_auth'] = True + + # Set creator as the user who is performing the changes + user_show = plugins.toolkit.get_action('user_show') + creator_user_id = dataset.get('creator_user_id', '') + user_show_context = {'ignore_auth': True} + user = user_show(user_show_context, {'id': creator_user_id}) + context_pkg_update['user'] = user.get('name', '') + + plugins.toolkit.get_action('package_update')(context_pkg_update, dataset) + log.info('Action %s access to dataset ended successfully' % context['method']) + else: + log.debug('Action %s access to dataset not completed. The dataset %s already %s access to the user %s' % (context['method'], dataset_id, context['method'], user_info['user'])) + else: + log.debug('Dataset %s is public. Cannot %s access to users' % (dataset_id, context['method'])) + warns.append('Unable to upload the dataset %s: It\'s a public dataset' % dataset_id) + + except plugins.toolkit.ObjectNotFound: + # If a dataset does not exist in the instance, an error message will be returned to the user. + # However the process won't stop and the process will continue with the remaining datasets. + log.debug('Dataset %s was not found in this instance' % dataset_id) + warns.append('Dataset %s was not found in this instance' % dataset_id) + except plugins.toolkit.ValidationError as e: + # Some datasets does not allow to introduce the list of allowed users since this property is + # only valid for private datasets outside an organization. In this case, a wanr will return + # but the process will continue + # WARN: This exception should not be risen anymore since public datasets are not updated. + message = '%s(%s): %s' % (dataset_id, constants.ALLOWED_USERS, e.error_dict[constants.ALLOWED_USERS][0]) + log.debug(message) + warns.append(message) + + # Return warnings that inform about non-existing datasets + if len(warns) > 0: + return {'warns': warns} \ No newline at end of file diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/auth.py b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/auth.py new file mode 100644 index 0000000..8b79c61 --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/auth.py @@ -0,0 +1,130 @@ +import ckan.lib.helpers as helpers +import ckan.logic.auth as logic_auth +import ckan.plugins.toolkit as tk +try: + import ckan.authz as authz +except ImportError: + import ckan.new_authz as authz + +import ckanext.d4science_theme.db as db + +from ckan.common import _, request + + +@tk.auth_allow_anonymous_access +def package_show(context, data_dict): + user = context.get('user') + user_obj = context.get('auth_user_obj') + package = logic_auth.get_package_object(context, data_dict) + + # datasets can be read by its creator + if package and user_obj and package.creator_user_id == user_obj.id: + return {'success': True} + + # Not active packages can only be seen by its owners + if package.state == 'active': + # anyone can see a public package + if not package.private: + return {'success': True} + + # if the user has rights to read in the organization or in the group + if package.owner_org: + authorized = authz.has_user_permission_for_group_or_org( + package.owner_org, user, 'read') + else: + authorized = False + + # if the user is not authorized yet, we should check if the + # user is in the allowed_users object + if not authorized: + # Init the model + db.init_db(context['model']) + + # Branch not executed if the database return an empty list + if db.AllowedUser.get(package_id=package.id, user_name=user): + authorized = True + + if not authorized: + # Show a flash message with the URL to acquire the dataset + # This message only can be shown when the user tries to access the dataset via its URL (/dataset/...) + # The message cannot be displayed in other pages that uses the package_show function such as + # the user profile page + + if hasattr(package, 'extras') and 'acquire_url' in package.extras and request.path.startswith('/dataset/')\ + and package.extras['acquire_url'] != '': + helpers.flash_notice(_('This private dataset can be acquired. To do so, please click ' + + 'here') % package.extras['acquire_url'], + allow_html=True) + + return {'success': False, 'msg': _('User %s not authorized to read package %s') % (user, package.id)} + else: + return {'success': True} + else: + return {'success': False, 'msg': _('User %s not authorized to read package %s') % (user, package.id)} + + +def package_update(context, data_dict): + user = context.get('user') + user_obj = context.get('auth_user_obj') + package = logic_auth.get_package_object(context, data_dict) + + # Only the package creator can update it + if package and user_obj and package.creator_user_id == user_obj.id: + return {'success': True} + + # if the user has rights to update a dataset in the organization or in the group + if package and package.owner_org: + authorized = authz.has_user_permission_for_group_or_org( + package.owner_org, user, 'update_dataset') + else: + authorized = False + + if not authorized: + return {'success': False, 'msg': _('User %s is not authorized to edit package %s') % (user, package.id)} + else: + return {'success': True} + + +@tk.auth_allow_anonymous_access +def resource_show(context, data_dict): + # This function is needed since CKAN resource_show function uses the default package_show + # function instead of the one defined in the plugin. + # A bug is openend in order to be able to remove this function + # https://github.com/ckan/ckan/issues/1818 + # It's fixed now, so this function can be deleted when the new version is released. + _model = context['model'] + user = context.get('user') + resource = logic_auth.get_resource_object(context, data_dict) + + # check authentication against package + query = _model.Session.query(_model.Package)\ + .join(_model.ResourceGroup)\ + .join(_model.Resource)\ + .filter(_model.ResourceGroup.id == resource.resource_group_id) + pkg = query.first() + if not pkg: + raise tk.ObjectNotFound(_('No package found for this resource, cannot check auth.')) + + pkg_dict = {'id': pkg.id} + authorized = package_show(context, pkg_dict).get('success') + + if not authorized: + return {'success': False, 'msg': _('User %s not authorized to read resource %s') % (user, resource.id)} + else: + return {'success': True} + + +@tk.auth_allow_anonymous_access +def package_acquired(context, data_dict): + # TODO: Improve security + return {'success': True} + +def acquisitions_list(context, data_dict): + # Users can get only their acquisitions list + return {'success': context['user'] == data_dict['user']} + +#V2 old repo d4science +@tk.auth_allow_anonymous_access +def revoke_access(context, data_dict): + # TODO: Check functionality and improve security(if needed) + return {'success': True} \ No newline at end of file diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/constants.py b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/constants.py new file mode 100644 index 0000000..7f04e65 --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/constants.py @@ -0,0 +1,28 @@ + +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 CoNWeT Lab., Universidad Politécnica de Madrid + +# This file is part of CKAN Private Dataset Extension. + +# CKAN Private Dataset Extension is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# CKAN Private Dataset Extension is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with CKAN Private Dataset Extension. If not, see . + +ALLOWED_USERS = 'allowed_users' +ACQUISITIONS_LIST = 'acquisitions_list' +ALLOWED_USERS_STR = 'allowed_users_str' +SEARCHABLE = 'searchable' +ACQUIRE_URL = 'acquire_url' +CONTEXT_CALLBACK = 'updating_via_cb' +PACKAGE_ACQUIRED = 'package_acquired' +PACKAGE_DELETED = 'revoke_access' \ No newline at end of file diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/converters_validators.py b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/converters_validators.py new file mode 100644 index 0000000..ef73d11 --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/converters_validators.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 CoNWeT Lab., Universidad Politécnica de Madrid +# Copyright (c) 2019 Future Internet Consulting and Development Solutions S.L. + +# This file is part of CKAN Private Dataset Extension. + +# CKAN Private Dataset Extension is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# CKAN Private Dataset Extension is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with CKAN Private Dataset Extension. If not, see . + +from __future__ import absolute_import + +from itertools import count +import re + +import ckan.plugins.toolkit as toolkit +from ckan.common import _ +import six + +from ckanext.d4science_theme.privatedatasets import constants +from ckanext.d4science_theme import db + + +def private_datasets_metadata_checker(key, data, errors, context): + + dataset_id = data.get(('id',)) + private_val = data.get(('private',)) + + # Avoid missing value + # "if not private_val:" is not valid because private_val can be False + if not isinstance(private_val, six.string_types) and not isinstance(private_val, bool): + private_val = None + + # If the private field is not included in the data dict, we must check the current value + if private_val is None and dataset_id: + dataset_dict = toolkit.get_action('package_show')({'ignore_auth': True}, {'id': dataset_id}) + private_val = dataset_dict.get('private') + + private = private_val is True if isinstance(private_val, bool) else private_val == 'True' + metadata_value = data[key] + + # If allowed users are included and the dataset is not private outside and organization, an error will be raised. + if metadata_value and not private: + errors[key].append(_('This field is only valid when you create a private dataset')) + + +def allowed_users_convert(key, data, errors, context): + + # By default, all the fileds are in the data dictionary even if they contains nothing. In this case, + # the value is 'ckan.lib.navl.dictization_functions.Missing' and for this reason the type is checked + + # Get the allowed user list + if (constants.ALLOWED_USERS,) in data and isinstance(data[(constants.ALLOWED_USERS,)], list): + allowed_users = data[(constants.ALLOWED_USERS,)] + elif (constants.ALLOWED_USERS_STR,) in data and isinstance(data[(constants.ALLOWED_USERS_STR,)], six.string_types): + allowed_users_str = data[(constants.ALLOWED_USERS_STR,)].strip() + allowed_users = [allowed_user for allowed_user in allowed_users_str.split(',') if allowed_user.strip() != ''] + else: + allowed_users = None + + if allowed_users is not None: + current_index = max([int(k[1]) for k in data.keys() if len(k) == 2 and k[0] == key[0]] + [-1]) + + if len(allowed_users) == 0: + data[(constants.ALLOWED_USERS,)] = [] + else: + for num, allowed_user in zip(count(current_index + 1), allowed_users): + allowed_user = allowed_user.strip() + data[(key[0], num)] = allowed_user + + +def get_allowed_users(key, data, errors, context): + pkg_id = data[('id',)] + + db.init_db(context['model']) + + users = db.AllowedUser.get(package_id=pkg_id) + + for i, user in enumerate(users): + data[(key[0], i)] = user.user_name + + +def url_checker(key, data, errors, context): + url = data.get(key, None) + + if url: + # DJango Regular Expression to check URLs + regex = re.compile( + r'^https?://' # scheme is validated separately + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?. + +import re +from urllib.parse import urlparse + +from ckan.common import request +import ckan.plugins.toolkit as tk + + +class FiWareNotificationParser(object): + + def parse_notification(self, request_data): + my_host = request.host + + fields = ['customer_name', 'resources'] + + for field in fields: + if field not in request_data: + raise tk.ValidationError({'message': '%s not found in the request' % field}) + + # Parse the body + resources = request_data['resources'] + user_name = request_data['customer_name'] + datasets = [] + + if not isinstance(user_name, str): + raise tk.ValidationError({'message': 'Invalid customer_name format'}) + + if not isinstance(resources, list): + raise tk.ValidationError({'message': 'Invalid resources format'}) + + for resource in resources: + if isinstance(resource, dict) and 'url' in resource: + parsed_url = urlparse(resource['url']) + dataset_name = re.findall('^/dataset/([^/]+).*$', parsed_url.path) + + resource_url = parsed_url.netloc + if ':' in my_host and ':' not in resource_url: + # Add the default port depending on the protocol + default_port = '80' if parsed_url.scheme == 'http' else '443' + resource_url = resource_url + default_port + + if len(dataset_name) == 1: + if resource_url == my_host: + datasets.append(dataset_name[0]) + else: + raise tk.ValidationError({'message': 'Dataset %s is associated with the CKAN instance located at %s, expected %s' + % (dataset_name[0], resource_url, my_host)}) + else: + raise tk.ValidationError({'message': 'Invalid resource format'}) + + return {'users_datasets': [{'user': user_name, 'datasets': datasets}]} diff --git a/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/views.py b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/views.py new file mode 100644 index 0000000..c21cb8b --- /dev/null +++ b/ckanext-d4science_theme/ckanext/d4science_theme/privatedatasets/views.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2018 Future Internet Consulting and Development Solutions S.L. + +# This file is part of CKAN Private Dataset Extension. + +# CKAN Private Dataset Extension is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# CKAN Private Dataset Extension is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with CKAN Private Dataset Extension. If not, see . + +from __future__ import absolute_import, unicode_literals + +from ckan import logic, model +from ckan.common import _, g +from ckan.lib import base +import ckan.plugins.toolkit as toolkit + +from flask import render_template + +from ckanext.d4science_theme.privatedatasets import constants + + +def acquired_datasets(): + context = {'auth_user_obj': g.userobj, 'for_view': True, 'model': model, 'session': model.Session, 'user': g.user} + data_dict = {'user_obj': g.userobj} + try: + user_dict = toolkit.get_action('user_show')(context, data_dict) + acquired_datasets = toolkit.get_action(constants.ACQUISITIONS_LIST)(context, None) + except logic.NotFound: + base.abort(404, _('User not found')) + except logic.NotAuthorized: + base.abort(403, _('Not authorized to see this page')) + + extra_vars = { + 'user_dict': user_dict, + 'acquired_datasets': acquired_datasets, + } + return render_template('user/dashboard_acquired.html', extra_vars) + + +class AcquiredDatasetsControllerUI(): + + def acquired_datasets(self): + return acquired_datasets()