From 0609262731fd95b0039ea8ba8faf0b49d1b59cec Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 13 Dec 2012 13:59:45 +0000 Subject: [PATCH 01/14] [#2] Update README link --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1a28e3c..9c038bb 100644 --- a/README.rst +++ b/README.rst @@ -119,7 +119,7 @@ The two available profiles right now are: To know more about the CKAN publisher auth profile, visit: - http://wiki.ckan.org/Working_with_the_publisher_auth_profile + http://oldwiki.ckan.org/Working_with_the_publisher_auth_profile The CKAN harvester From 19cd80b26497d7b933edd739d5e3f5962408c04d Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 20 Dec 2012 16:09:26 +0000 Subject: [PATCH 02/14] [#4] Fixes on the auth layer against the new core auth Thanks @locusf for the original patch --- ckanext/harvest/controllers/view.py | 5 ++- ckanext/harvest/logic/action/get.py | 34 +++++++++---------- ckanext/harvest/logic/auth/create.py | 16 +++++---- ckanext/harvest/logic/auth/delete.py | 6 ++-- ckanext/harvest/logic/auth/get.py | 19 +++++++---- .../harvest/logic/auth/publisher/create.py | 13 +++---- .../harvest/logic/auth/publisher/delete.py | 10 +++--- 7 files changed, 52 insertions(+), 51 deletions(-) diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index 913f398..565327f 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -3,7 +3,6 @@ from lxml import etree from lxml.etree import XMLSyntaxError from pylons.i18n import _ -from ckan.authz import Authorizer from ckan import model from ckan.model.group import Group @@ -33,9 +32,9 @@ class ViewController(BaseController): def _get_publishers(self): groups = None - + user = model.User.get(c.user) if c.publisher_auth: - if Authorizer().is_sysadmin(c.user): + if user.sysadmin: groups = Group.all(group_type='publisher') elif c.userobj: groups = c.userobj.get_groups('publisher') diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index 4032545..d26d436 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -1,6 +1,5 @@ import logging from sqlalchemy import or_, distinct -from ckan.authz import Authorizer from ckan.model import User import datetime @@ -284,26 +283,27 @@ def _get_sources_for_user(context,data_dict): ) # Sysadmins will get all sources - if not Authorizer().is_sysadmin(user): - # This only applies to a non sysadmin user when using the - # publisher auth profile. When using the default profile, - # normal users will never arrive at this point, but even if they - # do, they will get an empty list. + if user: user_obj = User.get(user) + if not user_obj.sysadmin: + # This only applies to a non sysadmin user when using the + # publisher auth profile. When using the default profile, + # normal users will never arrive at this point, but even if they + # do, they will get an empty list. - publisher_filters = [] - publishers_for_the_user = user_obj.get_groups(u'publisher') - for publisher_id in [g.id for g in publishers_for_the_user]: - publisher_filters.append(HarvestSource.publisher_id==publisher_id) + publisher_filters = [] + publishers_for_the_user = user_obj.get_groups(u'publisher') + for publisher_id in [g.id for g in publishers_for_the_user]: + publisher_filters.append(HarvestSource.publisher_id==publisher_id) - if len(publisher_filters): - query = query.filter(or_(*publisher_filters)) - else: - # This user does not belong to a publisher yet, no sources for him/her - return [] + if len(publisher_filters): + query = query.filter(or_(*publisher_filters)) + else: + # This user does not belong to a publisher yet, no sources for him/her + return [] - log.debug('User %s with publishers %r has Harvest Sources: %r', - user, publishers_for_the_user, [(hs.id, hs.url) for hs in query]) + log.debug('User %s with publishers %r has Harvest Sources: %r', + user, publishers_for_the_user, [(hs.id, hs.url) for hs in query]) sources = query.all() diff --git a/ckanext/harvest/logic/auth/create.py b/ckanext/harvest/logic/auth/create.py index 2173263..eed9742 100644 --- a/ckanext/harvest/logic/auth/create.py +++ b/ckanext/harvest/logic/auth/create.py @@ -1,29 +1,31 @@ from ckan.lib.base import _ -from ckan.authz import Authorizer +from ckan.model import User def harvest_source_create(context,data_dict): model = context['model'] user = context.get('user') - - if not Authorizer().is_sysadmin(user): + user = User.get(user) + if not user.sysadmin: return {'success': False, 'msg': _('User %s not authorized to create harvest sources') % str(user)} else: return {'success': True} + def harvest_job_create(context,data_dict): model = context['model'] user = context.get('user') - - if not Authorizer().is_sysadmin(user): + user = User.get(user) + if not user.sysadmin: return {'success': False, 'msg': _('User %s not authorized to create harvest jobs') % str(user)} else: return {'success': True} + def harvest_job_create_all(context,data_dict): model = context['model'] user = context.get('user') - - if not Authorizer().is_sysadmin(user): + user = User.get(user) + if not user.sysadmin: return {'success': False, 'msg': _('User %s not authorized to create harvest jobs for all sources') % str(user)} else: return {'success': True} diff --git a/ckanext/harvest/logic/auth/delete.py b/ckanext/harvest/logic/auth/delete.py index f527aea..03e7355 100644 --- a/ckanext/harvest/logic/auth/delete.py +++ b/ckanext/harvest/logic/auth/delete.py @@ -1,11 +1,11 @@ from ckan.lib.base import _ -from ckan.authz import Authorizer +from ckan.model import User def harvest_source_delete(context,data_dict): model = context['model'] user = context.get('user') - - if not Authorizer().is_sysadmin(user): + user = User.get(user) + if not user.sysadmin: return {'success': False, 'msg': _('User %s not authorized to delete harvest sources') % str(user)} else: return {'success': True} diff --git a/ckanext/harvest/logic/auth/get.py b/ckanext/harvest/logic/auth/get.py index c138b3e..0e396ac 100644 --- a/ckanext/harvest/logic/auth/get.py +++ b/ckanext/harvest/logic/auth/get.py @@ -1,11 +1,11 @@ from ckan.lib.base import _ -from ckan.authz import Authorizer def harvest_source_show(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to read this harvest source') % str(user)} else: return {'success': True} @@ -14,7 +14,8 @@ def harvest_source_list(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to see the harvest sources') % str(user)} else: return {'success': True} @@ -24,7 +25,8 @@ def harvest_job_show(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to read this harvest job') % str(user)} else: return {'success': True} @@ -33,7 +35,8 @@ def harvest_job_list(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to see the harvest jobs') % str(user)} else: return {'success': True} @@ -48,7 +51,8 @@ def harvest_object_list(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to see the harvest objects') % str(user)} else: return {'success': True} @@ -57,7 +61,8 @@ def harvesters_info_show(context,data_dict): model = context['model'] user = context.get('user') - if not Authorizer().is_sysadmin(user): + user_obj = model.User.get(user) + if not user_obj or not user_obj.sysadmin: return {'success': False, 'msg': _('User %s not authorized to see the harvesters information') % str(user)} else: return {'success': True} diff --git a/ckanext/harvest/logic/auth/publisher/create.py b/ckanext/harvest/logic/auth/publisher/create.py index eac8bb8..a773a69 100644 --- a/ckanext/harvest/logic/auth/publisher/create.py +++ b/ckanext/harvest/logic/auth/publisher/create.py @@ -1,5 +1,4 @@ from ckan.lib.base import _ -from ckan.authz import Authorizer from ckan.model import User from ckanext.harvest.model import HarvestSource @@ -15,7 +14,7 @@ def harvest_source_create(context,data_dict): # Sysadmins and the rest of logged users can create sources, # as long as they belong to a publisher user_obj = User.get(user) - if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0: + if not user_obj or not user_obj.sysadmin and len(user_obj.get_groups(u'publisher')) == 0: return {'success': False, 'msg': _('User %s must belong to a publisher to create harvest sources') % str(user)} else: return {'success': True} @@ -28,11 +27,9 @@ def harvest_job_create(context,data_dict): if not user: return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest jobs')} - - if Authorizer().is_sysadmin(user): - return {'success': True} - user_obj = User.get(user) + if user_obj.sysadmin: + return {'success': True} source = HarvestSource.get(source_id) if not source: raise NotFound @@ -45,8 +42,8 @@ def harvest_job_create(context,data_dict): def harvest_job_create_all(context,data_dict): model = context['model'] user = context.get('user') - - if not Authorizer().is_sysadmin(user): + user_obj = User.get(user) + if not user_obj.sysadmin: return {'success': False, 'msg': _('Only sysadmins can create harvest jobs for all sources') % str(user)} else: return {'success': True} diff --git a/ckanext/harvest/logic/auth/publisher/delete.py b/ckanext/harvest/logic/auth/publisher/delete.py index ec96ead..3c81d7c 100644 --- a/ckanext/harvest/logic/auth/publisher/delete.py +++ b/ckanext/harvest/logic/auth/publisher/delete.py @@ -1,9 +1,9 @@ from ckan.lib.base import _ -from ckan.authz import Authorizer from ckan.model import User from ckanext.harvest.logic.auth import get_source_object + def harvest_source_delete(context,data_dict): model = context['model'] user = context.get('user','') @@ -13,13 +13,11 @@ def harvest_source_delete(context,data_dict): # Non-logged users cannot delete this source if not user: return {'success': False, 'msg': _('Non-logged in users are not authorized to delete harvest sources')} - - # Sysadmins can delete the source - if Authorizer().is_sysadmin(user): - return {'success': True} - # Check if the source publisher id exists on the user's groups user_obj = User.get(user) + # Sysadmins can delete the source + if user_obj.sysadmin: + return {'success': True} if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: return {'success': False, 'msg': _('User %s not authorized to delete harvest source %s') % (str(user),source.id)} else: From 36389e7ce03d40fcbb49eb55b6178ed72408aea7 Mon Sep 17 00:00:00 2001 From: kindly Date: Mon, 24 Dec 2012 12:21:21 +0000 Subject: [PATCH 03/14] make sure gather phase finishes job if there is a severe error --- ckanext/harvest/queue.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ckanext/harvest/queue.py b/ckanext/harvest/queue.py index 8fd959e..99be672 100644 --- a/ckanext/harvest/queue.py +++ b/ckanext/harvest/queue.py @@ -115,7 +115,13 @@ def gather_callback(channel, method, header, body): harvester_found = True # Get a list of harvest object ids from the plugin job.gather_started = datetime.datetime.now() - harvest_object_ids = harvester.gather_stage(job) + try: + harvest_object_ids = harvester.gather_stage(job) + except: + log.error('Gather stage failed unexpectedly') + job.status = 'Errored' + job.save() + continue job.gather_finished = datetime.datetime.now() job.save() log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids) From 7b6beb14704f4fd2606583cffed60bd40b406741 Mon Sep 17 00:00:00 2001 From: kindly Date: Mon, 24 Dec 2012 22:34:37 +0000 Subject: [PATCH 04/14] fix wrong authorization logic --- ckanext/harvest/logic/action/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index 72791eb..3006957 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -283,7 +283,7 @@ def _get_sources_for_user(context,data_dict): user_obj = User.get(user) # Sysadmins will get all sources - if user_obj and user_obj.sysadmin: + if user_obj and not user_obj.sysadmin: # This only applies to a non sysadmin user when using the # publisher auth profile. When using the default profile, # normal users will never arrive at this point, but even if they From a86644502350c43cd07fce9d115c2ac971a135cf Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 9 Jan 2013 17:26:48 +0000 Subject: [PATCH 05/14] [#4] Refactor authorization functions The authorization functions have been refactored to take into account both the new organizaton based authorization on CKAN core and the harvest source datasets. Basically at the source level, authorization checks are forwarded to the relevant package auth function (package_create, package_update, etc.) wich will check for organizations membership, sysadmin, etc. Also we only use functions available on the plugins toolkit whenever possible. --- ckanext/harvest/logic/auth/__init__.py | 64 ++++++------ ckanext/harvest/logic/auth/create.py | 79 +++++++++------ ckanext/harvest/logic/auth/delete.py | 34 +++++-- ckanext/harvest/logic/auth/get.py | 129 ++++++++++++++----------- ckanext/harvest/logic/auth/update.py | 64 ++++++++---- 5 files changed, 223 insertions(+), 147 deletions(-) diff --git a/ckanext/harvest/logic/auth/__init__.py b/ckanext/harvest/logic/auth/__init__.py index 2f2b306..d92d1ec 100644 --- a/ckanext/harvest/logic/auth/__init__.py +++ b/ckanext/harvest/logic/auth/__init__.py @@ -1,39 +1,39 @@ -from ckan.logic import NotFound -from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject +from ckan.plugins import toolkit as pt +from ckanext.harvest import model as harvest_model +def user_is_sysadmin(context): + ''' + Checks if the user defined in the context is a sysadmin + + rtype: boolean + ''' + model = context['model'] + user = context['user'] + user_obj = model.User.get(user) + if not user_obj: + raise pt.Objectpt.ObjectNotFound('User {0} not found').format(user) + + return user_obj.sysadmin + +def _get_object(context, data_dict, name, class_name): + ''' + return the named item if in the data_dict, or get it from + model.class_name + ''' + if not name in context: + id = data_dict.get('id', None) + obj = getattr(harvest_model, class_name).get(id) + if not obj: + raise pt.ObjectNotFound + else: + obj = context[name] + return obj def get_source_object(context, data_dict = {}): - if not 'source' in context: - model = context['model'] - id = data_dict.get('id',None) - source = HarvestSource.get(id) - if not source: - raise NotFound - else: - source = context['source'] - - return source + return _get_object(context, data_dict, 'source', 'HarvestSource') def get_job_object(context, data_dict = {}): - if not 'job' in context: - model = context['model'] - id = data_dict.get('id',None) - job = HarvestJob.get(id) - if not job: - raise NotFound - else: - job = context['job'] - - return job + return _get_object(context, data_dict, 'job', 'HarvestJob') def get_obj_object(context, data_dict = {}): - if not 'obj' in context: - model = context['model'] - id = data_dict.get('id',None) - obj = HarvestObject.get(id) - if not obj: - raise NotFound - else: - obj = context['obj'] - - return obj + return _get_object(context, data_dict, 'obj', 'HarvestObject') diff --git a/ckanext/harvest/logic/auth/create.py b/ckanext/harvest/logic/auth/create.py index eed9742..984f489 100644 --- a/ckanext/harvest/logic/auth/create.py +++ b/ckanext/harvest/logic/auth/create.py @@ -1,32 +1,55 @@ -from ckan.lib.base import _ -from ckan.model import User +from ckan.plugins import toolkit as pt +from ckanext.harvest.logic.auth import user_is_sysadmin -def harvest_source_create(context,data_dict): - model = context['model'] + +def harvest_source_create(context, data_dict): + ''' + Authorization check for harvest source creation + + It forwards the checks to package_create, which will check for + organization membership, whether if sysadmin, etc according to the + instance configuration. + ''' user = context.get('user') - user = User.get(user) - if not user.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to create harvest sources') % str(user)} + try: + pt.check_access('package_create', context, data_dict) + return {'success': True} + except pt.Not_Authorized: + return {'success': False, + 'msg': pt._('User {0} not authorized to create harvest sources').format(user)} + + +def harvest_job_create(context, data_dict): + ''' + Authorization check for harvest job creation + + It forwards the checks to package_update, ie the user can only create + new jobs if she is allowed to edit the harvest source dataset. + ''' + model = context['model'] + source_id = data_dict['source_id'] + + pkg = model.Package.get(source_id) + if not pkg: + raise pt.ObjectNotFound(pt._('Harvest source not found')) + + context['package'] = pkg + + try: + pt.check_access('package_update', context, data_dict) + return {'success': True} + except pt.Not_Authorized: + return {'success': False, + 'msg': pt._('User not authorized to create a job for source {0}').format(source_id)} + + +def harvest_job_create_all(context, data_dict): + ''' + Authorization check for creating new jobs for all sources + + Only sysadmins can do it + ''' + if not user_is_sysadmin(context): + return {'success': False, 'msg': pt._('Only sysadmins can create harvest jobs for all sources')} else: return {'success': True} - - -def harvest_job_create(context,data_dict): - model = context['model'] - user = context.get('user') - user = User.get(user) - if not user.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to create harvest jobs') % str(user)} - else: - return {'success': True} - - -def harvest_job_create_all(context,data_dict): - model = context['model'] - user = context.get('user') - user = User.get(user) - if not user.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to create harvest jobs for all sources') % str(user)} - else: - return {'success': True} - diff --git a/ckanext/harvest/logic/auth/delete.py b/ckanext/harvest/logic/auth/delete.py index 03e7355..249fad4 100644 --- a/ckanext/harvest/logic/auth/delete.py +++ b/ckanext/harvest/logic/auth/delete.py @@ -1,13 +1,27 @@ -from ckan.lib.base import _ -from ckan.model import User +from ckan.plugins import toolkit as pt -def harvest_source_delete(context,data_dict): - model = context['model'] + +def harvest_source_update(context, data_dict): + ''' + Authorization check for harvest source deletion + + It forwards the checks to package_delete, which will check for + organization membership, whether if sysadmin, etc according to the + instance configuration. + ''' + model = context.get('model') user = context.get('user') - user = User.get(user) - if not user.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to delete harvest sources') % str(user)} - else: + source_id = data_dict['id'] + + pkg = model.Package.get(source_id) + if not pkg: + raise pt.ObjectNotFound(pt._('Harvest source not found')) + + context['package'] = pkg + + try: + pt.check_access('package_delete', context, data_dict) return {'success': True} - - + except pt.Not_Authorized: + return {'success': False, + 'msg': pt._('User {0} not authorized to delete harvest source {1}').format(user, source_id)} diff --git a/ckanext/harvest/logic/auth/get.py b/ckanext/harvest/logic/auth/get.py index 0e396ac..f2119a6 100644 --- a/ckanext/harvest/logic/auth/get.py +++ b/ckanext/harvest/logic/auth/get.py @@ -1,69 +1,86 @@ -from ckan.lib.base import _ +from ckan.plugins import toolkit as pt +from ckanext.harvest.logic.auth import get_job_object -def harvest_source_show(context,data_dict): - model = context['model'] + +def harvest_source_show(context, data_dict): + ''' + Authorization check for getting the details of a harvest source + + It forwards the checks to package_show, which will check for + organization membership, whether if sysadmin, etc according to the + instance configuration. + ''' + model = context.get('model') user = context.get('user') + source_id = data_dict['id'] - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to read this harvest source') % str(user)} - else: - return {'success': True} - -def harvest_source_list(context,data_dict): - model = context['model'] - user = context.get('user') - - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to see the harvest sources') % str(user)} - else: + pkg = model.Package.get(source_id) + if not pkg: + raise pt.ObjectNotFound(pt._('Harvest source not found')) + + context['package'] = pkg + + try: + pt.check_access('package_show', context, data_dict) return {'success': True} + except pt.Not_Authorized: + return {'success': False, + 'msg': pt._('User {0} not authorized to read harvest source {1}').format(user, source_id)} -def harvest_job_show(context,data_dict): - model = context['model'] - user = context.get('user') - - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to read this harvest job') % str(user)} - else: - return {'success': True} - -def harvest_job_list(context,data_dict): - model = context['model'] - user = context.get('user') - - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to see the harvest jobs') % str(user)} - else: - return {'success': True} - -def harvest_object_show(context,data_dict): - model = context['model'] - user = context.get('user') +def harvest_source_list(context, data_dict): + ''' + Authorization check for getting a list of harveste sources + Everybody can do it + ''' return {'success': True} -def harvest_object_list(context,data_dict): - model = context['model'] - user = context.get('user') - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to see the harvest objects') % str(user)} - else: - return {'success': True} +def harvest_job_show(context, data_dict): + ''' + Authorization check for getting the details of a harvest job -def harvesters_info_show(context,data_dict): - model = context['model'] - user = context.get('user') + It forwards the checks to harvest_source_show, ie if the user can get + the details for the parent source, she can get the details for the job + ''' + job = get_job_object(context, data_dict) - user_obj = model.User.get(user) - if not user_obj or not user_obj.sysadmin: - return {'success': False, 'msg': _('User %s not authorized to see the harvesters information') % str(user)} - else: - return {'success': True} + return harvest_source_show(context, {'id': job.source.id}) + +def harvest_job_list(context, data_dict): + ''' + Authorization check for getting a list of jobs for a source + + It forwards the checks to harvest_source_show, ie if the user can get + the details for the parent source, she can get the list of jobs + ''' + source_id = data_dict['source_id'] + return harvest_source_show(context, {'id': source_id}) + + +def harvest_object_show(context, data_dict): + ''' + Authorization check for getting the contents of a harvest object + + Everybody can do it + ''' + return {'success': True} + + +def harvest_object_list(context, data_dict): + ''' + TODO: remove + ''' + return {'success': True} + + +def harvesters_info_show(context, data_dict): + ''' + Authorization check for getting information about the available + harvesters + + Everybody can do it + ''' + return {'success': True} diff --git a/ckanext/harvest/logic/auth/update.py b/ckanext/harvest/logic/auth/update.py index efe84c7..162299c 100644 --- a/ckanext/harvest/logic/auth/update.py +++ b/ckanext/harvest/logic/auth/update.py @@ -1,30 +1,52 @@ -from ckan.lib.base import _ -from ckan.authz import Authorizer +from ckan.plugins import toolkit as pt +from ckanext.harvest.logic.auth import user_is_sysadmin -def harvest_source_update(context,data_dict): - model = context['model'] + +def harvest_source_update(context, data_dict): + ''' + Authorization check for harvest source update + + It forwards the checks to package_update, which will check for + organization membership, whether if sysadmin, etc according to the + instance configuration. + ''' + model = context.get('model') user = context.get('user') + source_id = data_dict['id'] - if not Authorizer().is_sysadmin(user): - return {'success': False, 'msg': _('User %s not authorized to update harvest sources') % str(user)} + pkg = model.Package.get(source_id) + if not pkg: + raise pt.ObjectNotFound(pt._('Harvest source not found')) + + context['package'] = pkg + + try: + pt.check_access('package_update', context, data_dict) + return {'success': True} + except pt.Not_Authorized: + return {'success': False, + 'msg': pt._('User {0} not authorized to update harvest source {1}').format(user, source_id)} + + +def harvest_objects_import(context, data_dict): + ''' + Authorization check reimporting all harvest objects + + Only sysadmins can do it + ''' + if not user_is_sysadmin(context): + return {'success': False, 'msg': pt._('Only sysadmins can reimport all harvest objects')} else: return {'success': True} -def harvest_objects_import(context,data_dict): - model = context['model'] - user = context.get('user') - if not Authorizer().is_sysadmin(user): - return {'success': False, 'msg': _('User %s not authorized to reimport harvest objects') % str(user)} +def harvest_jobs_run(context, data_dict): + ''' + Authorization check for running the pending harvest jobs + + Only sysadmins can do it + ''' + if not user_is_sysadmin(context): + return {'success': False, 'msg': pt._('Only sysadmins can run the pending harvest jobs')} else: return {'success': True} - -def harvest_jobs_run(context,data_dict): - model = context['model'] - user = context.get('user') - - if not Authorizer().is_sysadmin(user): - return {'success': False, 'msg': _('User %s not authorized to run the pending harvest jobs') % str(user)} - else: - return {'success': True} - From 058dcad435e7fdcab511267e6ba74397c726edff Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 9 Jan 2013 17:31:30 +0000 Subject: [PATCH 06/14] [#4] Minor change on the state field to fix a bug on harvest_source_show --- ckanext/harvest/logic/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index 38603ae..5e37393 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -30,7 +30,7 @@ def harvest_source_schema(): 'title': [if_empty_same_as("name"), unicode], 'notes': [ignore_missing, unicode], 'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras], - 'state': [ignore_not_package_admin, ignore_missing], + 'state': [ignore_missing], 'config': [ignore_missing, harvest_source_config_validator, convert_to_extras], 'extras': default_extras_schema(), '__extras': [ignore], From 288e1429a63fe703da719139520cb863d18bc5b2 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 9 Jan 2013 17:32:05 +0000 Subject: [PATCH 07/14] [#4] Remove the loading of different authorization profiles The different profiles will be now configured via the harvest source datasets on CKAN core, so it is no longer needed. Also simplify IActions and IAuthFunction hook calls. --- ckanext/harvest/plugin.py | 69 ++++++++++----------------------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index 3160793..ecd2d19 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -217,58 +217,26 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): p.toolkit.add_template_directory(config, templates) p.toolkit.add_public_directory(config, 'public') - def get_actions(self): - from ckanext.harvest.logic.action.get import (harvest_source_show, - harvest_source_show_status, - harvest_source_list, - harvest_source_for_a_dataset, - harvest_job_show, - harvest_job_list, - harvest_object_show, - harvest_object_list, - harvesters_info_show,) - from ckanext.harvest.logic.action.create import (harvest_source_create, - harvest_job_create, - harvest_job_create_all,) - from ckanext.harvest.logic.action.update import (harvest_source_update, - harvest_objects_import, - harvest_jobs_run) - from ckanext.harvest.logic.action.delete import (harvest_source_delete,) + ## IActions - return { - 'harvest_source_show': harvest_source_show, - 'harvest_source_show_status': harvest_source_show_status, - 'harvest_source_list': harvest_source_list, - 'harvest_source_for_a_dataset': harvest_source_for_a_dataset, - 'harvest_job_show': harvest_job_show, - 'harvest_job_list': harvest_job_list, - 'harvest_object_show': harvest_object_show, - 'harvest_object_list': harvest_object_list, - 'harvesters_info_show': harvesters_info_show, - 'harvest_source_create': harvest_source_create, - 'harvest_job_create': harvest_job_create, - 'harvest_job_create_all': harvest_job_create_all, - 'harvest_source_update': harvest_source_update, - 'harvest_source_delete': harvest_source_delete, - 'harvest_objects_import': harvest_objects_import, - 'harvest_jobs_run':harvest_jobs_run - } + def get_actions(self): + + module_root = 'ckanext.harvest.logic.action' + action_functions = _get_logic_functions(module_root) + + return action_functions + + ## IAuthFunctions def get_auth_functions(self): module_root = 'ckanext.harvest.logic.auth' - auth_profile = config.get('ckan.harvest.auth.profile', '') - - auth_functions = _get_auth_functions(module_root) - if auth_profile: - module_root = '%s.%s' % (module_root, auth_profile) - auth_functions = _get_auth_functions(module_root,auth_functions) - - log.debug('Using auth profile at %s' % module_root) + auth_functions = _get_logic_functions(module_root) return auth_functions ## ITemplateHelpers + def get_helpers(self): from ckanext.harvest import helpers as harvest_helpers return { @@ -279,14 +247,14 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): } -def _get_auth_functions(module_root, auth_functions = {}): +def _get_logic_functions(module_root, logic_functions = {}): - for auth_module_name in ['get', 'create', 'update','delete']: - module_path = '%s.%s' % (module_root, auth_module_name,) + for module_name in ['get', 'create', 'update','delete']: + module_path = '%s.%s' % (module_root, module_name,) try: module = __import__(module_path) - except ImportError,e: - log.debug('No auth module for action "%s"' % auth_module_name) + except ImportError: + log.debug('No auth module for action "{0}"'.format(module_name)) continue for part in module_path.split('.')[1:]: @@ -294,10 +262,9 @@ def _get_auth_functions(module_root, auth_functions = {}): for key, value in module.__dict__.items(): if not key.startswith('_'): - auth_functions[key] = value + logic_functions[key] = value - - return auth_functions + return logic_functions def _create_harvest_source_object(data_dict): ''' From e49dd94b344ee1f2c43037ab9809ce55034d5349 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 9 Jan 2013 17:35:47 +0000 Subject: [PATCH 08/14] [#4] Remove authorization functions for the publisher profile The different profiles will be now configured via the harvest source datasets on CKAN core, so they are no longer needed. --- .../harvest/logic/auth/publisher/__init__.py | 7 - .../harvest/logic/auth/publisher/create.py | 50 ------ .../harvest/logic/auth/publisher/delete.py | 25 --- ckanext/harvest/logic/auth/publisher/get.py | 163 ------------------ .../harvest/logic/auth/publisher/update.py | 83 --------- 5 files changed, 328 deletions(-) delete mode 100644 ckanext/harvest/logic/auth/publisher/__init__.py delete mode 100644 ckanext/harvest/logic/auth/publisher/create.py delete mode 100644 ckanext/harvest/logic/auth/publisher/delete.py delete mode 100644 ckanext/harvest/logic/auth/publisher/get.py delete mode 100644 ckanext/harvest/logic/auth/publisher/update.py diff --git a/ckanext/harvest/logic/auth/publisher/__init__.py b/ckanext/harvest/logic/auth/publisher/__init__.py deleted file mode 100644 index d0ed2fc..0000000 --- a/ckanext/harvest/logic/auth/publisher/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -try: - import pkg_resources - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) - diff --git a/ckanext/harvest/logic/auth/publisher/create.py b/ckanext/harvest/logic/auth/publisher/create.py deleted file mode 100644 index a773a69..0000000 --- a/ckanext/harvest/logic/auth/publisher/create.py +++ /dev/null @@ -1,50 +0,0 @@ -from ckan.lib.base import _ -from ckan.model import User - -from ckanext.harvest.model import HarvestSource - -def harvest_source_create(context,data_dict): - model = context['model'] - user = context.get('user','') - - # Non-logged users can not create sources - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest sources')} - - # Sysadmins and the rest of logged users can create sources, - # as long as they belong to a publisher - user_obj = User.get(user) - if not user_obj or not user_obj.sysadmin and len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to create harvest sources') % str(user)} - else: - return {'success': True} - -def harvest_job_create(context,data_dict): - model = context['model'] - user = context.get('user') - - source_id = data_dict['source_id'] - - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest jobs')} - user_obj = User.get(user) - if user_obj.sysadmin: - return {'success': True} - source = HarvestSource.get(source_id) - if not source: - raise NotFound - - if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to create a job for source %s') % (str(user),source.id)} - else: - return {'success': True} - -def harvest_job_create_all(context,data_dict): - model = context['model'] - user = context.get('user') - user_obj = User.get(user) - if not user_obj.sysadmin: - return {'success': False, 'msg': _('Only sysadmins can create harvest jobs for all sources') % str(user)} - else: - return {'success': True} - diff --git a/ckanext/harvest/logic/auth/publisher/delete.py b/ckanext/harvest/logic/auth/publisher/delete.py deleted file mode 100644 index 3c81d7c..0000000 --- a/ckanext/harvest/logic/auth/publisher/delete.py +++ /dev/null @@ -1,25 +0,0 @@ -from ckan.lib.base import _ -from ckan.model import User - -from ckanext.harvest.logic.auth import get_source_object - - -def harvest_source_delete(context,data_dict): - model = context['model'] - user = context.get('user','') - - source = get_source_object(context,data_dict) - - # Non-logged users cannot delete this source - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to delete harvest sources')} - # Check if the source publisher id exists on the user's groups - user_obj = User.get(user) - # Sysadmins can delete the source - if user_obj.sysadmin: - return {'success': True} - if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to delete harvest source %s') % (str(user),source.id)} - else: - return {'success': True} - diff --git a/ckanext/harvest/logic/auth/publisher/get.py b/ckanext/harvest/logic/auth/publisher/get.py deleted file mode 100644 index e8e6691..0000000 --- a/ckanext/harvest/logic/auth/publisher/get.py +++ /dev/null @@ -1,163 +0,0 @@ -from ckan.lib.base import _ -from ckan.logic import NotFound -from ckan.authz import Authorizer -from ckan.model import User - -from ckanext.harvest.model import HarvestSource -from ckanext.harvest.logic.auth import get_source_object, get_job_object, get_obj_object - -def harvest_source_show(context,data_dict): - model = context['model'] - user = context.get('user','') - - source = get_source_object(context,data_dict) - - # Non-logged users can not read the source - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest sources')} - - # Sysadmins can read the source - if Authorizer().is_sysadmin(user): - return {'success': True} - - # Check if the source publisher id exists on the user's groups - user_obj = User.get(user) - if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to read harvest source %s') % (str(user),source.id)} - else: - return {'success': True} - -def harvest_source_list(context,data_dict): - - model = context['model'] - user = context.get('user') - - # Here we will just check that the user is logged in. - # The logic action will return an empty list if the user does not - # have permissons on any source. - if not user: - return {'success': False, 'msg': _('Only logged users are authorized to see their sources')} - else: - user_obj = User.get(user) - assert user_obj - - # Only users belonging to a publisher can list sources, - # unless they are sysadmins - if Authorizer().is_sysadmin(user_obj): - return {'success': True} - if len(user_obj.get_groups(u'publisher')) > 0: - return {'success': True} - else: - return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest sources') % str(user)} - -def harvest_job_show(context,data_dict): - model = context['model'] - user = context.get('user') - - job = get_job_object(context,data_dict) - - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest jobs')} - - if Authorizer().is_sysadmin(user): - return {'success': True} - - user_obj = User.get(user) - if not user_obj or not job.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to read harvest job %s') % (str(user),job.id)} - else: - return {'success': True} - -def harvest_job_list(context,data_dict): - model = context['model'] - user = context.get('user') - - # Check user is logged in - if not user: - return {'success': False, 'msg': _('Only logged users are authorized to see their sources')} - - user_obj = User.get(user) - - # Checks for non sysadmin users - if not Authorizer().is_sysadmin(user): - if not user_obj or len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest jobs') % str(user)} - - source_id = data_dict.get('source_id',False) - if not source_id: - return {'success': False, 'msg': _('Only sysadmins can list all harvest jobs') % str(user)} - - source = HarvestSource.get(source_id) - if not source: - raise NotFound - - if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to list jobs from source %s') % (str(user),source.id)} - - return {'success': True} - -def harvest_object_show(context,data_dict): - model = context['model'] - user = context.get('user') - - obj = get_obj_object(context,data_dict) - - if context.get('ignore_auth', False): - return {'success': True} - - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest objects')} - - if Authorizer().is_sysadmin(user): - return {'success': True} - - user_obj = User.get(user) - if not user_obj or not obj.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to read harvest object %s') % (str(user),obj.id)} - else: - return {'success': True} - -def harvest_object_list(context,data_dict): - model = context['model'] - user = context.get('user') - - # Check user is logged in - if not user: - return {'success': False, 'msg': _('Only logged users are authorized to see their sources')} - - user_obj = User.get(user) - - # Checks for non sysadmin users - if not Authorizer().is_sysadmin(user): - if not user_obj or len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest objects') % str(user)} - - source_id = data_dict.get('source_id',False) - if not source_id: - return {'success': False, 'msg': _('Only sysadmins can list all harvest objects') % str(user)} - - source = HarvestSource.get(source_id) - if not source: - raise NotFound - - if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to list objects from source %s') % (str(user),source.id)} - - return {'success': True} - -def harvesters_info_show(context,data_dict): - model = context['model'] - user = context.get('user','') - - # Non-logged users can not create sources - if not user: - return {'success': False, 'msg': _('Non-logged in users can not see the harvesters info')} - - # Sysadmins and the rest of logged users can see the harvesters info, - # as long as they belong to a publisher - user_obj = User.get(user) - if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to see the harvesters info') % str(user)} - else: - return {'success': True} - diff --git a/ckanext/harvest/logic/auth/publisher/update.py b/ckanext/harvest/logic/auth/publisher/update.py deleted file mode 100644 index f4c160c..0000000 --- a/ckanext/harvest/logic/auth/publisher/update.py +++ /dev/null @@ -1,83 +0,0 @@ -from ckan.lib.base import _ -from ckan.authz import Authorizer -from ckan.model import User - -from ckanext.harvest.logic.auth import get_source_object - -def harvest_source_update(context,data_dict): - model = context['model'] - user = context.get('user','') - - source = get_source_object(context,data_dict) - - # Non-logged users can not update this source - if not user: - return {'success': False, 'msg': _('Non-logged in users are not authorized to update harvest sources')} - - # Sysadmins can update the source - if Authorizer().is_sysadmin(user): - return {'success': True} - - # Check if the source publisher id exists on the user's groups - user_obj = User.get(user) - if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to update harvest source %s') % (str(user),source.id)} - else: - return {'success': True} - -def harvest_objects_import(context,data_dict): - model = context['model'] - user = context.get('user') - - # Check user is logged in - if not user: - return {'success': False, 'msg': _('Only logged users are authorized to reimport harvest objects')} - - user_obj = User.get(user) - - # Checks for non sysadmin users - if not Authorizer().is_sysadmin(user): - if not user_obj or len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to reimport harvest objects') % str(user)} - - source_id = data_dict.get('source_id',False) - if not source_id: - return {'success': False, 'msg': _('Only sysadmins can reimport all harvest objects') % str(user)} - - source = HarvestSource.get(source_id) - if not source: - raise NotFound - - if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to reimport objects from source %s') % (str(user),source.id)} - - return {'success': True} - -def harvest_jobs_run(context,data_dict): - model = context['model'] - user = context.get('user') - - # Check user is logged in - if not user: - return {'success': False, 'msg': _('Only logged users are authorized to run harvest jobs')} - - user_obj = User.get(user) - - # Checks for non sysadmin users - if not Authorizer().is_sysadmin(user): - if not user_obj or len(user_obj.get_groups(u'publisher')) == 0: - return {'success': False, 'msg': _('User %s must belong to a publisher to run harvest jobs') % str(user)} - - source_id = data_dict.get('source_id',False) - if not source_id: - return {'success': False, 'msg': _('Only sysadmins can run all harvest jobs') % str(user)} - - source = HarvestSource.get(source_id) - if not source: - raise NotFound - - if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]: - return {'success': False, 'msg': _('User %s not authorized to run jobs from source %s') % (str(user),source.id)} - - return {'success': True} - From acb17ff3b05440b28e34174116ddccc08d0b5111 Mon Sep 17 00:00:00 2001 From: kindly Date: Thu, 10 Jan 2013 10:48:48 +0000 Subject: [PATCH 09/14] capture errors more cleanly --- ckanext/harvest/queue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ckanext/harvest/queue.py b/ckanext/harvest/queue.py index 99be672..d59406c 100644 --- a/ckanext/harvest/queue.py +++ b/ckanext/harvest/queue.py @@ -117,8 +117,8 @@ def gather_callback(channel, method, header, body): job.gather_started = datetime.datetime.now() try: harvest_object_ids = harvester.gather_stage(job) - except: - log.error('Gather stage failed unexpectedly') + except Exception, e: + log.error('Gather stage failed unexpectedly: %s' % e) job.status = 'Errored' job.save() continue @@ -166,6 +166,8 @@ def fetch_callback(channel, method, header, body): obj.save() if obj.retry_times >= 5: + obj.state = "ERROR" + obj.save() log.error('Too many consecutive retries for object {0}'.format(obj.id)) channel.basic_ack(method.delivery_tag) return False From 2bb669af21099daa360896650a9c389e1fc18ed8 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 10 Jan 2013 12:23:01 +0000 Subject: [PATCH 10/14] [#4] Add owner_org field to schema and form This should store the owner organization id. Also added the errors box on the form. --- ckanext/harvest/logic/schema.py | 2 ++ .../templates_new/source/new_source_form.html | 26 ++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index 5e37393..d071c05 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -1,6 +1,7 @@ from ckan.logic.schema import default_extras_schema from ckan.logic.validators import (package_id_exists, name_validator, + owner_org_validator, package_name_validator, ignore_not_package_admin, ) @@ -29,6 +30,7 @@ def harvest_source_schema(): 'source_type': [not_empty, unicode, harvest_source_type_exists, convert_to_extras], 'title': [if_empty_same_as("name"), unicode], 'notes': [ignore_missing, unicode], + 'owner_org': [owner_org_validator, unicode], 'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras], 'state': [ignore_missing], 'config': [ignore_missing, harvest_source_config_validator, convert_to_extras], diff --git a/ckanext/harvest/templates_new/source/new_source_form.html b/ckanext/harvest/templates_new/source/new_source_form.html index be509ef..2b4fae1 100644 --- a/ckanext/harvest/templates_new/source/new_source_form.html +++ b/ckanext/harvest/templates_new/source/new_source_form.html @@ -2,6 +2,8 @@
+ {% block errors %}{{ form.errors(error_summary) }}{% endblock %} + {% call form.input('url', id='field-url', label=_('URL'), value=data.url, error=errors.url, classes=['control-full', 'control-large']) %} {{ _('This should include the http:// part of the URL') }} @@ -32,7 +34,29 @@ {{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }} -
TODO: state / delete
+ {# if we have a default group then this wants remembering #} + {% if data.group_id %} + + {% endif %} + {% set existing_org = data.owner_org or data.group_id %} + {% if h.check_access('sysadmin') or data.get('state', 'draft').startswith('draft') or data.get('state', 'none') == 'none' %} + {% set organizations_available = h.organizations_available('create_dataset') %} + {% if organizations_available %} +
+ +
+ +
+
+ {% endif %} + {% endif %} From 2f4cd3a4b0f616b6d65b4f4510e0c16e45dc7bfe Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 15 Jan 2013 19:29:17 +0000 Subject: [PATCH 11/14] [#4] Fix logic functions importer --- ckanext/harvest/plugin.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index ecd2d19..32c8d1d 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -1,7 +1,6 @@ +import types from logging import getLogger -from pylons import config - from ckan import logic from ckan import model import ckan.plugins as p @@ -261,7 +260,7 @@ def _get_logic_functions(module_root, logic_functions = {}): module = getattr(module, part) for key, value in module.__dict__.items(): - if not key.startswith('_'): + if not key.startswith('_') and isinstance(value, types.FunctionType): logic_functions[key] = value return logic_functions From 2ab10afcf92a16f239f6cd86f26f9cebd5bae865 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 16 Jan 2013 12:56:58 +0000 Subject: [PATCH 12/14] [#4] Fix typo in auth functions --- ckanext/harvest/logic/auth/create.py | 5 ++--- ckanext/harvest/logic/auth/delete.py | 2 +- ckanext/harvest/logic/auth/get.py | 2 +- ckanext/harvest/logic/auth/update.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ckanext/harvest/logic/auth/create.py b/ckanext/harvest/logic/auth/create.py index 984f489..03aa01b 100644 --- a/ckanext/harvest/logic/auth/create.py +++ b/ckanext/harvest/logic/auth/create.py @@ -14,7 +14,7 @@ def harvest_source_create(context, data_dict): try: pt.check_access('package_create', context, data_dict) return {'success': True} - except pt.Not_Authorized: + except pt.NotAuthorized: return {'success': False, 'msg': pt._('User {0} not authorized to create harvest sources').format(user)} @@ -34,11 +34,10 @@ def harvest_job_create(context, data_dict): raise pt.ObjectNotFound(pt._('Harvest source not found')) context['package'] = pkg - try: pt.check_access('package_update', context, data_dict) return {'success': True} - except pt.Not_Authorized: + except pt.NotAuthorized: return {'success': False, 'msg': pt._('User not authorized to create a job for source {0}').format(source_id)} diff --git a/ckanext/harvest/logic/auth/delete.py b/ckanext/harvest/logic/auth/delete.py index 249fad4..f364ab4 100644 --- a/ckanext/harvest/logic/auth/delete.py +++ b/ckanext/harvest/logic/auth/delete.py @@ -22,6 +22,6 @@ def harvest_source_update(context, data_dict): try: pt.check_access('package_delete', context, data_dict) return {'success': True} - except pt.Not_Authorized: + except pt.NotAuthorized: return {'success': False, 'msg': pt._('User {0} not authorized to delete harvest source {1}').format(user, source_id)} diff --git a/ckanext/harvest/logic/auth/get.py b/ckanext/harvest/logic/auth/get.py index f2119a6..d540c5b 100644 --- a/ckanext/harvest/logic/auth/get.py +++ b/ckanext/harvest/logic/auth/get.py @@ -23,7 +23,7 @@ def harvest_source_show(context, data_dict): try: pt.check_access('package_show', context, data_dict) return {'success': True} - except pt.Not_Authorized: + except pt.NotAuthorized: return {'success': False, 'msg': pt._('User {0} not authorized to read harvest source {1}').format(user, source_id)} diff --git a/ckanext/harvest/logic/auth/update.py b/ckanext/harvest/logic/auth/update.py index 162299c..a5bf31b 100644 --- a/ckanext/harvest/logic/auth/update.py +++ b/ckanext/harvest/logic/auth/update.py @@ -23,7 +23,7 @@ def harvest_source_update(context, data_dict): try: pt.check_access('package_update', context, data_dict) return {'success': True} - except pt.Not_Authorized: + except pt.NotAuthorized: return {'success': False, 'msg': pt._('User {0} not authorized to update harvest source {1}').format(user, source_id)} From bfce5185f06b8f2bec3a43ee0151507aaa80b374 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 16 Jan 2013 17:45:33 +0000 Subject: [PATCH 13/14] [#4] Add db_to_form_schema_options to harvest plugin to avoid validation on show --- ckanext/harvest/plugin.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index 32c8d1d..5f0747a 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -133,6 +133,17 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): return harvest_source_form_to_db_schema() + def db_to_form_schema_options(self, options): + ''' + Similar to db_to_form_schema but with further options to allow + slightly different schemas, eg for creation or deletion on the API. + ''' + if options.get('type') == 'show': + return None + else: + return self.db_to_form_schema() + + def db_to_form_schema(self): ''' Returns the schema for mapping package data from the database into a From 30c9eedf5f8af20f73364d0a9f42f9620628a133 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 17 Jan 2013 15:43:45 +0000 Subject: [PATCH 14/14] Improve harvest source status creation Use report_status field to improve speed, remove unnecessary fields. --- ckanext/harvest/logic/action/get.py | 95 +++++++++++------------------ 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index 3006957..2212d18 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -1,5 +1,5 @@ import logging -from sqlalchemy import or_, distinct +from sqlalchemy import or_, func from ckan.model import User import datetime @@ -7,6 +7,7 @@ from ckan import logic from ckan.plugins import PluginImplementations from ckanext.harvest.interfaces import IHarvester +import ckan.plugins as p from ckan.logic import NotFound, check_access from ckanext.harvest import model as harvest_model @@ -18,6 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize, from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema log = logging.getLogger(__name__) + def harvest_source_show(context,data_dict): ''' Returns the metadata of a harvest source @@ -41,8 +43,7 @@ def harvest_source_show(context,data_dict): return source_dict - -def harvest_source_show_status(context,data_dict): +def harvest_source_show_status(context, data_dict): ''' Returns a status report for a harvest source @@ -58,87 +59,63 @@ def harvest_source_show_status(context,data_dict): ''' model = context.get('model') - detailed = context.get('detailed',True) - source = harvest_model.HarvestSource.get(data_dict['id']) if not source: - raise logic.NotFound('Harvest source {0} does not exist'.format(data_dict['id'])) - - out = {} - - jobs = harvest_model.HarvestJob.filter(source=source).all() + raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id'])) out = { 'job_count': 0, - 'next_harvest':'', - 'last_harvest_request':'', - 'last_harvest_statistics':{'added':0,'updated':0,'errors':0}, - 'overall_statistics':{'added':0, 'errors':0}, + 'next_harvest': p.toolkit._('Not yet scheduled'), + 'last_harvest_request': '', + 'last_harvest_statistics': {'new': 0, 'updated': 0, 'deleted': 0,'errored': 0}, + 'total_datasets': 0, } + jobs = harvest_model.HarvestJob.filter(source=source).all() + job_count = len(jobs) if job_count == 0: - out['msg'] = 'No jobs yet' return out - else: - out['job_count'] = job_count + + out['job_count'] = job_count # Get next scheduled job next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first() if next_job: - out['next_harvest'] = 'Scheduled' - else: - out['next_harvest'] = 'Not yet scheduled' + out['next_harvest'] = p.toolkit._('Scheduled') # Get the last finished job last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \ .order_by(harvest_model.HarvestJob.created.desc()).first() - if last_job: - out['last_job_id'] = last_job.id - out['last_harvest_request'] = str(last_job.gather_finished) + if not last_job: + out['last_harvest_request'] = p.toolkit._('Not yet harvested') + return out - #Get HarvestObjects from last job with links to packages - if detailed: - last_objects = [obj for obj in last_job.objects if obj.package is not None] + out['last_job_id'] = last_job.id + out['last_harvest_request'] = str(last_job.gather_finished) - if len(last_objects) == 0: - # No packages added or updated - out['last_harvest_statistics']['added'] = 0 - out['last_harvest_statistics']['updated'] = 0 - else: - # Check wether packages were added or updated - for last_object in last_objects: - # Check if the same package had been linked before - previous_objects = model.Session.query(harvest_model.HarvestObject) \ - .filter(harvest_model.HarvestObject.package==last_object.package) \ - .count() + last_job_report = model.Session.query( + harvest_model.HarvestObject.report_status, + func.count(harvest_model.HarvestObject.report_status)) \ + .filter(harvest_model.HarvestObject.harvest_job_id==last_job.id) \ + .group_by(harvest_model.HarvestObject.report_status) - if previous_objects == 1: - # It didn't previously exist, it has been added - out['last_harvest_statistics']['added'] += 1 - else: - # Pacakge already existed, but it has been updated - out['last_harvest_statistics']['updated'] += 1 + for row in last_job_report: + if row[0]: + out['last_harvest_statistics'][row[0]] = row[1] - # Last harvest errors - # We have the gathering errors in last_job.gather_errors, so let's also - # get also the object errors. - object_errors = model.Session.query(harvest_model.HarvestObjectError).join(harvest_model.HarvestObject) \ - .filter(harvest_model.HarvestObject.job==last_job) + # Add the gather stage errors + out['last_harvest_statistics']['errored'] += len(last_job.gather_errors) - out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \ - + object_errors.count() - # Overall statistics - packages = model.Session.query(distinct(harvest_model.HarvestObject.package_id), model.Package.name) \ - .join(model.Package).join(HarvestSource) \ - .filter(HarvestObject.source==source) \ - .filter(HarvestObject.current==True) \ - .filter(model.Package.state==u'active') + # Overall statistics + packages = model.Session.query(model.Package) \ + .join(harvest_model.HarvestObject) \ + .filter(harvest_model.HarvestObject.harvest_source_id==source.id) \ + .filter(harvest_model.HarvestObject.current==True) \ + .filter(model.Package.state==u'active') - out['overall_statistics']['added'] = packages.count() - else: - out['last_harvest_request'] = 'Not yet harvested' + out['total_datasets'] = packages.count() return out