Merge branch '4-new-auth-for-2.0' into 2.0-dataset-sources

This commit is contained in:
amercader 2013-01-22 12:03:26 +00:00
commit 0950827329
17 changed files with 328 additions and 593 deletions

View File

@ -119,7 +119,7 @@ The two available profiles right now are:
To know more about the CKAN publisher auth profile, visit:
http://wiki.ckan.org/Working_with_the_publisher_auth_profile
http://oldwiki.ckan.org/Working_with_the_publisher_auth_profile
The CKAN harvester

View File

@ -3,7 +3,6 @@ from lxml import etree
from lxml.etree import XMLSyntaxError
from pylons.i18n import _
from ckan.authz import Authorizer
from ckan import model
from ckan.model.group import Group
@ -33,9 +32,9 @@ class ViewController(BaseController):
def _get_publishers(self):
groups = None
user = model.User.get(c.user)
if c.publisher_auth:
if Authorizer().is_sysadmin(c.user):
if user.sysadmin:
groups = Group.all(group_type='publisher')
elif c.userobj:
groups = c.userobj.get_groups('publisher')

View File

@ -1,6 +1,5 @@
import logging
from sqlalchemy import or_, distinct
from ckan.authz import Authorizer
from sqlalchemy import or_, func
from ckan.model import User
import datetime
@ -8,6 +7,7 @@ from ckan import logic
from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester
import ckan.plugins as p
from ckan.logic import NotFound, check_access
from ckanext.harvest import model as harvest_model
@ -19,6 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
log = logging.getLogger(__name__)
def harvest_source_show(context,data_dict):
'''
Returns the metadata of a harvest source
@ -42,8 +43,7 @@ def harvest_source_show(context,data_dict):
return source_dict
def harvest_source_show_status(context,data_dict):
def harvest_source_show_status(context, data_dict):
'''
Returns a status report for a harvest source
@ -59,87 +59,63 @@ def harvest_source_show_status(context,data_dict):
'''
model = context.get('model')
detailed = context.get('detailed',True)
source = harvest_model.HarvestSource.get(data_dict['id'])
if not source:
raise logic.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
out = {}
jobs = harvest_model.HarvestJob.filter(source=source).all()
raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
out = {
'job_count': 0,
'next_harvest':'',
'last_harvest_request':'',
'last_harvest_statistics':{'added':0,'updated':0,'errors':0},
'overall_statistics':{'added':0, 'errors':0},
'next_harvest': p.toolkit._('Not yet scheduled'),
'last_harvest_request': '',
'last_harvest_statistics': {'new': 0, 'updated': 0, 'deleted': 0,'errored': 0},
'total_datasets': 0,
}
jobs = harvest_model.HarvestJob.filter(source=source).all()
job_count = len(jobs)
if job_count == 0:
out['msg'] = 'No jobs yet'
return out
else:
out['job_count'] = job_count
out['job_count'] = job_count
# Get next scheduled job
next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first()
if next_job:
out['next_harvest'] = 'Scheduled'
else:
out['next_harvest'] = 'Not yet scheduled'
out['next_harvest'] = p.toolkit._('Scheduled')
# Get the last finished job
last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \
.order_by(harvest_model.HarvestJob.created.desc()).first()
if last_job:
out['last_job_id'] = last_job.id
out['last_harvest_request'] = str(last_job.gather_finished)
if not last_job:
out['last_harvest_request'] = p.toolkit._('Not yet harvested')
return out
#Get HarvestObjects from last job with links to packages
if detailed:
last_objects = [obj for obj in last_job.objects if obj.package is not None]
out['last_job_id'] = last_job.id
out['last_harvest_request'] = str(last_job.gather_finished)
if len(last_objects) == 0:
# No packages added or updated
out['last_harvest_statistics']['added'] = 0
out['last_harvest_statistics']['updated'] = 0
else:
# Check wether packages were added or updated
for last_object in last_objects:
# Check if the same package had been linked before
previous_objects = model.Session.query(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.package==last_object.package) \
.count()
last_job_report = model.Session.query(
harvest_model.HarvestObject.report_status,
func.count(harvest_model.HarvestObject.report_status)) \
.filter(harvest_model.HarvestObject.harvest_job_id==last_job.id) \
.group_by(harvest_model.HarvestObject.report_status)
if previous_objects == 1:
# It didn't previously exist, it has been added
out['last_harvest_statistics']['added'] += 1
else:
# Pacakge already existed, but it has been updated
out['last_harvest_statistics']['updated'] += 1
for row in last_job_report:
if row[0]:
out['last_harvest_statistics'][row[0]] = row[1]
# Last harvest errors
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = model.Session.query(harvest_model.HarvestObjectError).join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.job==last_job)
# Add the gather stage errors
out['last_harvest_statistics']['errored'] += len(last_job.gather_errors)
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
+ object_errors.count()
# Overall statistics
packages = model.Session.query(distinct(harvest_model.HarvestObject.package_id), model.Package.name) \
.join(model.Package).join(HarvestSource) \
.filter(HarvestObject.source==source) \
.filter(HarvestObject.current==True) \
.filter(model.Package.state==u'active')
# Overall statistics
packages = model.Session.query(model.Package) \
.join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.harvest_source_id==source.id) \
.filter(harvest_model.HarvestObject.current==True) \
.filter(model.Package.state==u'active')
out['overall_statistics']['added'] = packages.count()
else:
out['last_harvest_request'] = 'Not yet harvested'
out['total_datasets'] = packages.count()
return out
@ -284,7 +260,7 @@ def _get_sources_for_user(context,data_dict):
user_obj = User.get(user)
# Sysadmins will get all sources
if user_obj and user_obj.sysadmin:
if user_obj and not user_obj.sysadmin:
# This only applies to a non sysadmin user when using the
# publisher auth profile. When using the default profile,
# normal users will never arrive at this point, but even if they

View File

@ -1,39 +1,39 @@
from ckan.logic import NotFound
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
from ckan.plugins import toolkit as pt
from ckanext.harvest import model as harvest_model
def user_is_sysadmin(context):
'''
Checks if the user defined in the context is a sysadmin
rtype: boolean
'''
model = context['model']
user = context['user']
user_obj = model.User.get(user)
if not user_obj:
raise pt.Objectpt.ObjectNotFound('User {0} not found').format(user)
return user_obj.sysadmin
def _get_object(context, data_dict, name, class_name):
'''
return the named item if in the data_dict, or get it from
model.class_name
'''
if not name in context:
id = data_dict.get('id', None)
obj = getattr(harvest_model, class_name).get(id)
if not obj:
raise pt.ObjectNotFound
else:
obj = context[name]
return obj
def get_source_object(context, data_dict = {}):
if not 'source' in context:
model = context['model']
id = data_dict.get('id',None)
source = HarvestSource.get(id)
if not source:
raise NotFound
else:
source = context['source']
return source
return _get_object(context, data_dict, 'source', 'HarvestSource')
def get_job_object(context, data_dict = {}):
if not 'job' in context:
model = context['model']
id = data_dict.get('id',None)
job = HarvestJob.get(id)
if not job:
raise NotFound
else:
job = context['job']
return job
return _get_object(context, data_dict, 'job', 'HarvestJob')
def get_obj_object(context, data_dict = {}):
if not 'obj' in context:
model = context['model']
id = data_dict.get('id',None)
obj = HarvestObject.get(id)
if not obj:
raise NotFound
else:
obj = context['obj']
return obj
return _get_object(context, data_dict, 'obj', 'HarvestObject')

View File

@ -1,30 +1,54 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.plugins import toolkit as pt
from ckanext.harvest.logic.auth import user_is_sysadmin
def harvest_source_create(context,data_dict):
model = context['model']
def harvest_source_create(context, data_dict):
'''
Authorization check for harvest source creation
It forwards the checks to package_create, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
user = context.get('user')
try:
pt.check_access('package_create', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to create harvest sources').format(user)}
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest sources') % str(user)}
def harvest_job_create(context, data_dict):
'''
Authorization check for harvest job creation
It forwards the checks to package_update, ie the user can only create
new jobs if she is allowed to edit the harvest source dataset.
'''
model = context['model']
source_id = data_dict['source_id']
pkg = model.Package.get(source_id)
if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_update', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User not authorized to create a job for source {0}').format(source_id)}
def harvest_job_create_all(context, data_dict):
'''
Authorization check for creating new jobs for all sources
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can create harvest jobs for all sources')}
else:
return {'success': True}
def harvest_job_create(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest jobs') % str(user)}
else:
return {'success': True}
def harvest_job_create_all(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest jobs for all sources') % str(user)}
else:
return {'success': True}

View File

@ -1,13 +1,27 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.plugins import toolkit as pt
def harvest_source_delete(context,data_dict):
model = context['model']
def harvest_source_update(context, data_dict):
'''
Authorization check for harvest source deletion
It forwards the checks to package_delete, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to delete harvest sources') % str(user)}
else:
pkg = model.Package.get(source_id)
if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_delete', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to delete harvest source {1}').format(user, source_id)}

View File

@ -1,64 +1,86 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.plugins import toolkit as pt
from ckanext.harvest.logic.auth import get_job_object
def harvest_source_show(context,data_dict):
model = context['model']
def harvest_source_show(context, data_dict):
'''
Authorization check for getting the details of a harvest source
It forwards the checks to package_show, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to read this harvest source') % str(user)}
else:
return {'success': True}
def harvest_source_list(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvest sources') % str(user)}
else:
pkg = model.Package.get(source_id)
if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_show', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to read harvest source {1}').format(user, source_id)}
def harvest_job_show(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to read this harvest job') % str(user)}
else:
return {'success': True}
def harvest_job_list(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvest jobs') % str(user)}
else:
return {'success': True}
def harvest_object_show(context,data_dict):
model = context['model']
user = context.get('user')
def harvest_source_list(context, data_dict):
'''
Authorization check for getting a list of harveste sources
Everybody can do it
'''
return {'success': True}
def harvest_object_list(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvest objects') % str(user)}
else:
return {'success': True}
def harvest_job_show(context, data_dict):
'''
Authorization check for getting the details of a harvest job
def harvesters_info_show(context,data_dict):
model = context['model']
user = context.get('user')
It forwards the checks to harvest_source_show, ie if the user can get
the details for the parent source, she can get the details for the job
'''
job = get_job_object(context, data_dict)
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvesters information') % str(user)}
else:
return {'success': True}
return harvest_source_show(context, {'id': job.source.id})
def harvest_job_list(context, data_dict):
'''
Authorization check for getting a list of jobs for a source
It forwards the checks to harvest_source_show, ie if the user can get
the details for the parent source, she can get the list of jobs
'''
source_id = data_dict['source_id']
return harvest_source_show(context, {'id': source_id})
def harvest_object_show(context, data_dict):
'''
Authorization check for getting the contents of a harvest object
Everybody can do it
'''
return {'success': True}
def harvest_object_list(context, data_dict):
'''
TODO: remove
'''
return {'success': True}
def harvesters_info_show(context, data_dict):
'''
Authorization check for getting information about the available
harvesters
Everybody can do it
'''
return {'success': True}

View File

@ -1,7 +0,0 @@
try:
import pkg_resources
pkg_resources.declare_namespace(__name__)
except ImportError:
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)

View File

@ -1,53 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.model import HarvestSource
def harvest_source_create(context,data_dict):
model = context['model']
user = context.get('user','')
# Non-logged users can not create sources
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest sources')}
# Sysadmins and the rest of logged users can create sources,
# as long as they belong to a publisher
user_obj = User.get(user)
if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to create harvest sources') % str(user)}
else:
return {'success': True}
def harvest_job_create(context,data_dict):
model = context['model']
user = context.get('user')
source_id = data_dict['source_id']
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest jobs')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to create a job for source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_job_create_all(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('Only sysadmins can create harvest jobs for all sources') % str(user)}
else:
return {'success': True}

View File

@ -1,27 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.logic.auth import get_source_object
def harvest_source_delete(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users cannot delete this source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to delete harvest sources')}
# Sysadmins can delete the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to delete harvest source %s') % (str(user),source.id)}
else:
return {'success': True}

View File

@ -1,163 +0,0 @@
from ckan.lib.base import _
from ckan.logic import NotFound
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.model import HarvestSource
from ckanext.harvest.logic.auth import get_source_object, get_job_object, get_obj_object
def harvest_source_show(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users can not read the source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest sources')}
# Sysadmins can read the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_source_list(context,data_dict):
model = context['model']
user = context.get('user')
# Here we will just check that the user is logged in.
# The logic action will return an empty list if the user does not
# have permissons on any source.
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
else:
user_obj = User.get(user)
assert user_obj
# Only users belonging to a publisher can list sources,
# unless they are sysadmins
if Authorizer().is_sysadmin(user_obj):
return {'success': True}
if len(user_obj.get_groups(u'publisher')) > 0:
return {'success': True}
else:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest sources') % str(user)}
def harvest_job_show(context,data_dict):
model = context['model']
user = context.get('user')
job = get_job_object(context,data_dict)
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest jobs')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
if not user_obj or not job.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest job %s') % (str(user),job.id)}
else:
return {'success': True}
def harvest_job_list(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest jobs') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can list all harvest jobs') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to list jobs from source %s') % (str(user),source.id)}
return {'success': True}
def harvest_object_show(context,data_dict):
model = context['model']
user = context.get('user')
obj = get_obj_object(context,data_dict)
if context.get('ignore_auth', False):
return {'success': True}
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest objects')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
if not user_obj or not obj.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest object %s') % (str(user),obj.id)}
else:
return {'success': True}
def harvest_object_list(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest objects') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can list all harvest objects') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to list objects from source %s') % (str(user),source.id)}
return {'success': True}
def harvesters_info_show(context,data_dict):
model = context['model']
user = context.get('user','')
# Non-logged users can not create sources
if not user:
return {'success': False, 'msg': _('Non-logged in users can not see the harvesters info')}
# Sysadmins and the rest of logged users can see the harvesters info,
# as long as they belong to a publisher
user_obj = User.get(user)
if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to see the harvesters info') % str(user)}
else:
return {'success': True}

View File

@ -1,83 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.logic.auth import get_source_object
def harvest_source_update(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users can not update this source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to update harvest sources')}
# Sysadmins can update the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to update harvest source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_objects_import(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to reimport harvest objects')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to reimport harvest objects') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can reimport all harvest objects') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to reimport objects from source %s') % (str(user),source.id)}
return {'success': True}
def harvest_jobs_run(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to run harvest jobs')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to run harvest jobs') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can run all harvest jobs') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to run jobs from source %s') % (str(user),source.id)}
return {'success': True}

View File

@ -1,30 +1,52 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.plugins import toolkit as pt
from ckanext.harvest.logic.auth import user_is_sysadmin
def harvest_source_update(context,data_dict):
model = context['model']
def harvest_source_update(context, data_dict):
'''
Authorization check for harvest source update
It forwards the checks to package_update, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to update harvest sources') % str(user)}
pkg = model.Package.get(source_id)
if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_update', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to update harvest source {1}').format(user, source_id)}
def harvest_objects_import(context, data_dict):
'''
Authorization check reimporting all harvest objects
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can reimport all harvest objects')}
else:
return {'success': True}
def harvest_objects_import(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to reimport harvest objects') % str(user)}
def harvest_jobs_run(context, data_dict):
'''
Authorization check for running the pending harvest jobs
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can run the pending harvest jobs')}
else:
return {'success': True}
def harvest_jobs_run(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to run the pending harvest jobs') % str(user)}
else:
return {'success': True}

View File

@ -1,6 +1,7 @@
from ckan.logic.schema import default_extras_schema
from ckan.logic.validators import (package_id_exists,
name_validator,
owner_org_validator,
package_name_validator,
ignore_not_package_admin,
)
@ -29,8 +30,9 @@ def harvest_source_schema():
'source_type': [not_empty, unicode, harvest_source_type_exists, convert_to_extras],
'title': [if_empty_same_as("name"), unicode],
'notes': [ignore_missing, unicode],
'owner_org': [owner_org_validator, unicode],
'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras],
'state': [ignore_not_package_admin, ignore_missing],
'state': [ignore_missing],
'config': [ignore_missing, harvest_source_config_validator, convert_to_extras],
'extras': default_extras_schema(),
'__extras': [ignore],

View File

@ -1,7 +1,6 @@
import types
from logging import getLogger
from pylons import config
from ckan import logic
from ckan import model
import ckan.plugins as p
@ -134,6 +133,17 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
return harvest_source_form_to_db_schema()
def db_to_form_schema_options(self, options):
'''
Similar to db_to_form_schema but with further options to allow
slightly different schemas, eg for creation or deletion on the API.
'''
if options.get('type') == 'show':
return None
else:
return self.db_to_form_schema()
def db_to_form_schema(self):
'''
Returns the schema for mapping package data from the database into a
@ -217,58 +227,26 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
p.toolkit.add_template_directory(config, templates)
p.toolkit.add_public_directory(config, 'public')
def get_actions(self):
from ckanext.harvest.logic.action.get import (harvest_source_show,
harvest_source_show_status,
harvest_source_list,
harvest_source_for_a_dataset,
harvest_job_show,
harvest_job_list,
harvest_object_show,
harvest_object_list,
harvesters_info_show,)
from ckanext.harvest.logic.action.create import (harvest_source_create,
harvest_job_create,
harvest_job_create_all,)
from ckanext.harvest.logic.action.update import (harvest_source_update,
harvest_objects_import,
harvest_jobs_run)
from ckanext.harvest.logic.action.delete import (harvest_source_delete,)
## IActions
return {
'harvest_source_show': harvest_source_show,
'harvest_source_show_status': harvest_source_show_status,
'harvest_source_list': harvest_source_list,
'harvest_source_for_a_dataset': harvest_source_for_a_dataset,
'harvest_job_show': harvest_job_show,
'harvest_job_list': harvest_job_list,
'harvest_object_show': harvest_object_show,
'harvest_object_list': harvest_object_list,
'harvesters_info_show': harvesters_info_show,
'harvest_source_create': harvest_source_create,
'harvest_job_create': harvest_job_create,
'harvest_job_create_all': harvest_job_create_all,
'harvest_source_update': harvest_source_update,
'harvest_source_delete': harvest_source_delete,
'harvest_objects_import': harvest_objects_import,
'harvest_jobs_run':harvest_jobs_run
}
def get_actions(self):
module_root = 'ckanext.harvest.logic.action'
action_functions = _get_logic_functions(module_root)
return action_functions
## IAuthFunctions
def get_auth_functions(self):
module_root = 'ckanext.harvest.logic.auth'
auth_profile = config.get('ckan.harvest.auth.profile', '')
auth_functions = _get_auth_functions(module_root)
if auth_profile:
module_root = '%s.%s' % (module_root, auth_profile)
auth_functions = _get_auth_functions(module_root,auth_functions)
log.debug('Using auth profile at %s' % module_root)
auth_functions = _get_logic_functions(module_root)
return auth_functions
## ITemplateHelpers
def get_helpers(self):
from ckanext.harvest import helpers as harvest_helpers
return {
@ -279,25 +257,24 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
}
def _get_auth_functions(module_root, auth_functions = {}):
def _get_logic_functions(module_root, logic_functions = {}):
for auth_module_name in ['get', 'create', 'update','delete']:
module_path = '%s.%s' % (module_root, auth_module_name,)
for module_name in ['get', 'create', 'update','delete']:
module_path = '%s.%s' % (module_root, module_name,)
try:
module = __import__(module_path)
except ImportError,e:
log.debug('No auth module for action "%s"' % auth_module_name)
except ImportError:
log.debug('No auth module for action "{0}"'.format(module_name))
continue
for part in module_path.split('.')[1:]:
module = getattr(module, part)
for key, value in module.__dict__.items():
if not key.startswith('_'):
auth_functions[key] = value
if not key.startswith('_') and isinstance(value, types.FunctionType):
logic_functions[key] = value
return auth_functions
return logic_functions
def _create_harvest_source_object(data_dict):
'''

View File

@ -115,7 +115,13 @@ def gather_callback(channel, method, header, body):
harvester_found = True
# Get a list of harvest object ids from the plugin
job.gather_started = datetime.datetime.now()
harvest_object_ids = harvester.gather_stage(job)
try:
harvest_object_ids = harvester.gather_stage(job)
except Exception, e:
log.error('Gather stage failed unexpectedly: %s' % e)
job.status = 'Errored'
job.save()
continue
job.gather_finished = datetime.datetime.now()
job.save()
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
@ -160,6 +166,8 @@ def fetch_callback(channel, method, header, body):
obj.save()
if obj.retry_times >= 5:
obj.state = "ERROR"
obj.save()
log.error('Too many consecutive retries for object {0}'.format(obj.id))
channel.basic_ack(method.delivery_tag)
return False

View File

@ -2,6 +2,8 @@
<form id="source-new" class="form-horizontal" method="post" >
{% block errors %}{{ form.errors(error_summary) }}{% endblock %}
{% call form.input('url', id='field-url', label=_('URL'), value=data.url, error=errors.url, classes=['control-full', 'control-large']) %}
<span class="info-block icon-large icon-info-sign">
{{ _('This should include the http:// part of the URL') }}
@ -32,7 +34,29 @@
{{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }}
<div><b>TODO: state / delete</b> </div>
{# if we have a default group then this wants remembering #}
{% if data.group_id %}
<input type="hidden" name="groups__0__id" value="{{ data.group_id }}" />
{% endif %}
{% set existing_org = data.owner_org or data.group_id %}
{% if h.check_access('sysadmin') or data.get('state', 'draft').startswith('draft') or data.get('state', 'none') == 'none' %}
{% set organizations_available = h.organizations_available('create_dataset') %}
{% if organizations_available %}
<div class="control-group">
<label for="field-organizations" class="control-label">{{ _('Organization') }}</label>
<div class="controls">
<select id="field-organizations" name="owner_org" data-module="autocomplete">
<option value="">{{ _('Select an organization...') }}</option>
{% for organization in organizations_available %}
{# get out first org from users list only if there is not an existing org #}
{% set selected_org = (existing_org and existing_org == organization.id) or (not existing_org and organization.id == organizations_available[0].id) %}
<option value="{{ organization.id }}" {% if selected_org %} selected="selected" {% endif %}>{{ organization.name }}</option>
{% endfor %}
</select>
</div>
</div>
{% endif %}
{% endif %}
<input id="save" name="save" value="Save" type="submit" class="btn"/>