Merge branch '4-new-auth-for-2.0' into 2.0-dataset-sources

This commit is contained in:
amercader 2013-01-22 12:03:26 +00:00
commit 0950827329
17 changed files with 328 additions and 593 deletions

View File

@ -119,7 +119,7 @@ The two available profiles right now are:
To know more about the CKAN publisher auth profile, visit: To know more about the CKAN publisher auth profile, visit:
http://wiki.ckan.org/Working_with_the_publisher_auth_profile http://oldwiki.ckan.org/Working_with_the_publisher_auth_profile
The CKAN harvester The CKAN harvester

View File

@ -3,7 +3,6 @@ from lxml import etree
from lxml.etree import XMLSyntaxError from lxml.etree import XMLSyntaxError
from pylons.i18n import _ from pylons.i18n import _
from ckan.authz import Authorizer
from ckan import model from ckan import model
from ckan.model.group import Group from ckan.model.group import Group
@ -33,9 +32,9 @@ class ViewController(BaseController):
def _get_publishers(self): def _get_publishers(self):
groups = None groups = None
user = model.User.get(c.user)
if c.publisher_auth: if c.publisher_auth:
if Authorizer().is_sysadmin(c.user): if user.sysadmin:
groups = Group.all(group_type='publisher') groups = Group.all(group_type='publisher')
elif c.userobj: elif c.userobj:
groups = c.userobj.get_groups('publisher') groups = c.userobj.get_groups('publisher')

View File

@ -1,6 +1,5 @@
import logging import logging
from sqlalchemy import or_, distinct from sqlalchemy import or_, func
from ckan.authz import Authorizer
from ckan.model import User from ckan.model import User
import datetime import datetime
@ -8,6 +7,7 @@ from ckan import logic
from ckan.plugins import PluginImplementations from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester from ckanext.harvest.interfaces import IHarvester
import ckan.plugins as p
from ckan.logic import NotFound, check_access from ckan.logic import NotFound, check_access
from ckanext.harvest import model as harvest_model from ckanext.harvest import model as harvest_model
@ -19,6 +19,7 @@ from ckanext.harvest.logic.dictization import (harvest_source_dictize,
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def harvest_source_show(context,data_dict): def harvest_source_show(context,data_dict):
''' '''
Returns the metadata of a harvest source Returns the metadata of a harvest source
@ -42,8 +43,7 @@ def harvest_source_show(context,data_dict):
return source_dict return source_dict
def harvest_source_show_status(context, data_dict):
def harvest_source_show_status(context,data_dict):
''' '''
Returns a status report for a harvest source Returns a status report for a harvest source
@ -59,87 +59,63 @@ def harvest_source_show_status(context,data_dict):
''' '''
model = context.get('model') model = context.get('model')
detailed = context.get('detailed',True)
source = harvest_model.HarvestSource.get(data_dict['id']) source = harvest_model.HarvestSource.get(data_dict['id'])
if not source: if not source:
raise logic.NotFound('Harvest source {0} does not exist'.format(data_dict['id'])) raise p.toolkit.NotFound('Harvest source {0} does not exist'.format(data_dict['id']))
out = {}
jobs = harvest_model.HarvestJob.filter(source=source).all()
out = { out = {
'job_count': 0, 'job_count': 0,
'next_harvest':'', 'next_harvest': p.toolkit._('Not yet scheduled'),
'last_harvest_request':'', 'last_harvest_request': '',
'last_harvest_statistics':{'added':0,'updated':0,'errors':0}, 'last_harvest_statistics': {'new': 0, 'updated': 0, 'deleted': 0,'errored': 0},
'overall_statistics':{'added':0, 'errors':0}, 'total_datasets': 0,
} }
jobs = harvest_model.HarvestJob.filter(source=source).all()
job_count = len(jobs) job_count = len(jobs)
if job_count == 0: if job_count == 0:
out['msg'] = 'No jobs yet'
return out return out
else:
out['job_count'] = job_count out['job_count'] = job_count
# Get next scheduled job # Get next scheduled job
next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first() next_job = harvest_model.HarvestJob.filter(source=source,status=u'New').first()
if next_job: if next_job:
out['next_harvest'] = 'Scheduled' out['next_harvest'] = p.toolkit._('Scheduled')
else:
out['next_harvest'] = 'Not yet scheduled'
# Get the last finished job # Get the last finished job
last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \ last_job = harvest_model.HarvestJob.filter(source=source,status=u'Finished') \
.order_by(harvest_model.HarvestJob.created.desc()).first() .order_by(harvest_model.HarvestJob.created.desc()).first()
if last_job: if not last_job:
out['last_job_id'] = last_job.id out['last_harvest_request'] = p.toolkit._('Not yet harvested')
out['last_harvest_request'] = str(last_job.gather_finished) return out
#Get HarvestObjects from last job with links to packages out['last_job_id'] = last_job.id
if detailed: out['last_harvest_request'] = str(last_job.gather_finished)
last_objects = [obj for obj in last_job.objects if obj.package is not None]
if len(last_objects) == 0: last_job_report = model.Session.query(
# No packages added or updated harvest_model.HarvestObject.report_status,
out['last_harvest_statistics']['added'] = 0 func.count(harvest_model.HarvestObject.report_status)) \
out['last_harvest_statistics']['updated'] = 0 .filter(harvest_model.HarvestObject.harvest_job_id==last_job.id) \
else: .group_by(harvest_model.HarvestObject.report_status)
# Check wether packages were added or updated
for last_object in last_objects:
# Check if the same package had been linked before
previous_objects = model.Session.query(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.package==last_object.package) \
.count()
if previous_objects == 1: for row in last_job_report:
# It didn't previously exist, it has been added if row[0]:
out['last_harvest_statistics']['added'] += 1 out['last_harvest_statistics'][row[0]] = row[1]
else:
# Pacakge already existed, but it has been updated
out['last_harvest_statistics']['updated'] += 1
# Last harvest errors # Add the gather stage errors
# We have the gathering errors in last_job.gather_errors, so let's also out['last_harvest_statistics']['errored'] += len(last_job.gather_errors)
# get also the object errors.
object_errors = model.Session.query(harvest_model.HarvestObjectError).join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.job==last_job)
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \ # Overall statistics
+ object_errors.count() packages = model.Session.query(model.Package) \
# Overall statistics .join(harvest_model.HarvestObject) \
packages = model.Session.query(distinct(harvest_model.HarvestObject.package_id), model.Package.name) \ .filter(harvest_model.HarvestObject.harvest_source_id==source.id) \
.join(model.Package).join(HarvestSource) \ .filter(harvest_model.HarvestObject.current==True) \
.filter(HarvestObject.source==source) \ .filter(model.Package.state==u'active')
.filter(HarvestObject.current==True) \
.filter(model.Package.state==u'active')
out['overall_statistics']['added'] = packages.count() out['total_datasets'] = packages.count()
else:
out['last_harvest_request'] = 'Not yet harvested'
return out return out
@ -284,7 +260,7 @@ def _get_sources_for_user(context,data_dict):
user_obj = User.get(user) user_obj = User.get(user)
# Sysadmins will get all sources # Sysadmins will get all sources
if user_obj and user_obj.sysadmin: if user_obj and not user_obj.sysadmin:
# This only applies to a non sysadmin user when using the # This only applies to a non sysadmin user when using the
# publisher auth profile. When using the default profile, # publisher auth profile. When using the default profile,
# normal users will never arrive at this point, but even if they # normal users will never arrive at this point, but even if they

View File

@ -1,39 +1,39 @@
from ckan.logic import NotFound from ckan.plugins import toolkit as pt
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject from ckanext.harvest import model as harvest_model
def user_is_sysadmin(context):
'''
Checks if the user defined in the context is a sysadmin
rtype: boolean
'''
model = context['model']
user = context['user']
user_obj = model.User.get(user)
if not user_obj:
raise pt.Objectpt.ObjectNotFound('User {0} not found').format(user)
return user_obj.sysadmin
def _get_object(context, data_dict, name, class_name):
'''
return the named item if in the data_dict, or get it from
model.class_name
'''
if not name in context:
id = data_dict.get('id', None)
obj = getattr(harvest_model, class_name).get(id)
if not obj:
raise pt.ObjectNotFound
else:
obj = context[name]
return obj
def get_source_object(context, data_dict = {}): def get_source_object(context, data_dict = {}):
if not 'source' in context: return _get_object(context, data_dict, 'source', 'HarvestSource')
model = context['model']
id = data_dict.get('id',None)
source = HarvestSource.get(id)
if not source:
raise NotFound
else:
source = context['source']
return source
def get_job_object(context, data_dict = {}): def get_job_object(context, data_dict = {}):
if not 'job' in context: return _get_object(context, data_dict, 'job', 'HarvestJob')
model = context['model']
id = data_dict.get('id',None)
job = HarvestJob.get(id)
if not job:
raise NotFound
else:
job = context['job']
return job
def get_obj_object(context, data_dict = {}): def get_obj_object(context, data_dict = {}):
if not 'obj' in context: return _get_object(context, data_dict, 'obj', 'HarvestObject')
model = context['model']
id = data_dict.get('id',None)
obj = HarvestObject.get(id)
if not obj:
raise NotFound
else:
obj = context['obj']
return obj

View File

@ -1,30 +1,54 @@
from ckan.lib.base import _ from ckan.plugins import toolkit as pt
from ckan.authz import Authorizer from ckanext.harvest.logic.auth import user_is_sysadmin
def harvest_source_create(context,data_dict):
model = context['model'] def harvest_source_create(context, data_dict):
'''
Authorization check for harvest source creation
It forwards the checks to package_create, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
user = context.get('user') user = context.get('user')
try:
pt.check_access('package_create', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to create harvest sources').format(user)}
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest sources') % str(user)} def harvest_job_create(context, data_dict):
'''
Authorization check for harvest job creation
It forwards the checks to package_update, ie the user can only create
new jobs if she is allowed to edit the harvest source dataset.
'''
model = context['model']
source_id = data_dict['source_id']
pkg = model.Package.get(source_id)
if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_update', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User not authorized to create a job for source {0}').format(source_id)}
def harvest_job_create_all(context, data_dict):
'''
Authorization check for creating new jobs for all sources
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can create harvest jobs for all sources')}
else: else:
return {'success': True} return {'success': True}
def harvest_job_create(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest jobs') % str(user)}
else:
return {'success': True}
def harvest_job_create_all(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to create harvest jobs for all sources') % str(user)}
else:
return {'success': True}

View File

@ -1,13 +1,27 @@
from ckan.lib.base import _ from ckan.plugins import toolkit as pt
from ckan.authz import Authorizer
def harvest_source_delete(context,data_dict):
model = context['model'] def harvest_source_update(context, data_dict):
'''
Authorization check for harvest source deletion
It forwards the checks to package_delete, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user') user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user): pkg = model.Package.get(source_id)
return {'success': False, 'msg': _('User %s not authorized to delete harvest sources') % str(user)} if not pkg:
else: raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_delete', context, data_dict)
return {'success': True} return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to delete harvest source {1}').format(user, source_id)}

View File

@ -1,64 +1,86 @@
from ckan.lib.base import _ from ckan.plugins import toolkit as pt
from ckan.authz import Authorizer from ckanext.harvest.logic.auth import get_job_object
def harvest_source_show(context,data_dict):
model = context['model'] def harvest_source_show(context, data_dict):
'''
Authorization check for getting the details of a harvest source
It forwards the checks to package_show, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user') user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user): pkg = model.Package.get(source_id)
return {'success': False, 'msg': _('User %s not authorized to read this harvest source') % str(user)} if not pkg:
else: raise pt.ObjectNotFound(pt._('Harvest source not found'))
return {'success': True}
context['package'] = pkg
def harvest_source_list(context,data_dict):
model = context['model'] try:
user = context.get('user') pt.check_access('package_show', context, data_dict)
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvest sources') % str(user)}
else:
return {'success': True} return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to read harvest source {1}').format(user, source_id)}
def harvest_job_show(context,data_dict): def harvest_source_list(context, data_dict):
model = context['model'] '''
user = context.get('user') Authorization check for getting a list of harveste sources
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to read this harvest job') % str(user)}
else:
return {'success': True}
def harvest_job_list(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to see the harvest jobs') % str(user)}
else:
return {'success': True}
def harvest_object_show(context,data_dict):
model = context['model']
user = context.get('user')
Everybody can do it
'''
return {'success': True} return {'success': True}
def harvest_object_list(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user): def harvest_job_show(context, data_dict):
return {'success': False, 'msg': _('User %s not authorized to see the harvest objects') % str(user)} '''
else: Authorization check for getting the details of a harvest job
return {'success': True}
def harvesters_info_show(context,data_dict): It forwards the checks to harvest_source_show, ie if the user can get
model = context['model'] the details for the parent source, she can get the details for the job
user = context.get('user') '''
job = get_job_object(context, data_dict)
if not Authorizer().is_sysadmin(user): return harvest_source_show(context, {'id': job.source.id})
return {'success': False, 'msg': _('User %s not authorized to see the harvesters information') % str(user)}
else:
return {'success': True}
def harvest_job_list(context, data_dict):
'''
Authorization check for getting a list of jobs for a source
It forwards the checks to harvest_source_show, ie if the user can get
the details for the parent source, she can get the list of jobs
'''
source_id = data_dict['source_id']
return harvest_source_show(context, {'id': source_id})
def harvest_object_show(context, data_dict):
'''
Authorization check for getting the contents of a harvest object
Everybody can do it
'''
return {'success': True}
def harvest_object_list(context, data_dict):
'''
TODO: remove
'''
return {'success': True}
def harvesters_info_show(context, data_dict):
'''
Authorization check for getting information about the available
harvesters
Everybody can do it
'''
return {'success': True}

View File

@ -1,7 +0,0 @@
try:
import pkg_resources
pkg_resources.declare_namespace(__name__)
except ImportError:
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)

View File

@ -1,53 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.model import HarvestSource
def harvest_source_create(context,data_dict):
model = context['model']
user = context.get('user','')
# Non-logged users can not create sources
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest sources')}
# Sysadmins and the rest of logged users can create sources,
# as long as they belong to a publisher
user_obj = User.get(user)
if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to create harvest sources') % str(user)}
else:
return {'success': True}
def harvest_job_create(context,data_dict):
model = context['model']
user = context.get('user')
source_id = data_dict['source_id']
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to create harvest jobs')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to create a job for source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_job_create_all(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('Only sysadmins can create harvest jobs for all sources') % str(user)}
else:
return {'success': True}

View File

@ -1,27 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.logic.auth import get_source_object
def harvest_source_delete(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users cannot delete this source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to delete harvest sources')}
# Sysadmins can delete the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to delete harvest source %s') % (str(user),source.id)}
else:
return {'success': True}

View File

@ -1,163 +0,0 @@
from ckan.lib.base import _
from ckan.logic import NotFound
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.model import HarvestSource
from ckanext.harvest.logic.auth import get_source_object, get_job_object, get_obj_object
def harvest_source_show(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users can not read the source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest sources')}
# Sysadmins can read the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_source_list(context,data_dict):
model = context['model']
user = context.get('user')
# Here we will just check that the user is logged in.
# The logic action will return an empty list if the user does not
# have permissons on any source.
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
else:
user_obj = User.get(user)
assert user_obj
# Only users belonging to a publisher can list sources,
# unless they are sysadmins
if Authorizer().is_sysadmin(user_obj):
return {'success': True}
if len(user_obj.get_groups(u'publisher')) > 0:
return {'success': True}
else:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest sources') % str(user)}
def harvest_job_show(context,data_dict):
model = context['model']
user = context.get('user')
job = get_job_object(context,data_dict)
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest jobs')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
if not user_obj or not job.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest job %s') % (str(user),job.id)}
else:
return {'success': True}
def harvest_job_list(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest jobs') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can list all harvest jobs') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to list jobs from source %s') % (str(user),source.id)}
return {'success': True}
def harvest_object_show(context,data_dict):
model = context['model']
user = context.get('user')
obj = get_obj_object(context,data_dict)
if context.get('ignore_auth', False):
return {'success': True}
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to see harvest objects')}
if Authorizer().is_sysadmin(user):
return {'success': True}
user_obj = User.get(user)
if not user_obj or not obj.source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to read harvest object %s') % (str(user),obj.id)}
else:
return {'success': True}
def harvest_object_list(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest objects') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can list all harvest objects') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to list objects from source %s') % (str(user),source.id)}
return {'success': True}
def harvesters_info_show(context,data_dict):
model = context['model']
user = context.get('user','')
# Non-logged users can not create sources
if not user:
return {'success': False, 'msg': _('Non-logged in users can not see the harvesters info')}
# Sysadmins and the rest of logged users can see the harvesters info,
# as long as they belong to a publisher
user_obj = User.get(user)
if not user_obj or not Authorizer().is_sysadmin(user) and len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to see the harvesters info') % str(user)}
else:
return {'success': True}

View File

@ -1,83 +0,0 @@
from ckan.lib.base import _
from ckan.authz import Authorizer
from ckan.model import User
from ckanext.harvest.logic.auth import get_source_object
def harvest_source_update(context,data_dict):
model = context['model']
user = context.get('user','')
source = get_source_object(context,data_dict)
# Non-logged users can not update this source
if not user:
return {'success': False, 'msg': _('Non-logged in users are not authorized to update harvest sources')}
# Sysadmins can update the source
if Authorizer().is_sysadmin(user):
return {'success': True}
# Check if the source publisher id exists on the user's groups
user_obj = User.get(user)
if not user_obj or not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to update harvest source %s') % (str(user),source.id)}
else:
return {'success': True}
def harvest_objects_import(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to reimport harvest objects')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to reimport harvest objects') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can reimport all harvest objects') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to reimport objects from source %s') % (str(user),source.id)}
return {'success': True}
def harvest_jobs_run(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to run harvest jobs')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to run harvest jobs') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can run all harvest jobs') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to run jobs from source %s') % (str(user),source.id)}
return {'success': True}

View File

@ -1,30 +1,52 @@
from ckan.lib.base import _ from ckan.plugins import toolkit as pt
from ckan.authz import Authorizer from ckanext.harvest.logic.auth import user_is_sysadmin
def harvest_source_update(context,data_dict):
model = context['model'] def harvest_source_update(context, data_dict):
'''
Authorization check for harvest source update
It forwards the checks to package_update, which will check for
organization membership, whether if sysadmin, etc according to the
instance configuration.
'''
model = context.get('model')
user = context.get('user') user = context.get('user')
source_id = data_dict['id']
if not Authorizer().is_sysadmin(user): pkg = model.Package.get(source_id)
return {'success': False, 'msg': _('User %s not authorized to update harvest sources') % str(user)} if not pkg:
raise pt.ObjectNotFound(pt._('Harvest source not found'))
context['package'] = pkg
try:
pt.check_access('package_update', context, data_dict)
return {'success': True}
except pt.NotAuthorized:
return {'success': False,
'msg': pt._('User {0} not authorized to update harvest source {1}').format(user, source_id)}
def harvest_objects_import(context, data_dict):
'''
Authorization check reimporting all harvest objects
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can reimport all harvest objects')}
else: else:
return {'success': True} return {'success': True}
def harvest_objects_import(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user): def harvest_jobs_run(context, data_dict):
return {'success': False, 'msg': _('User %s not authorized to reimport harvest objects') % str(user)} '''
Authorization check for running the pending harvest jobs
Only sysadmins can do it
'''
if not user_is_sysadmin(context):
return {'success': False, 'msg': pt._('Only sysadmins can run the pending harvest jobs')}
else: else:
return {'success': True} return {'success': True}
def harvest_jobs_run(context,data_dict):
model = context['model']
user = context.get('user')
if not Authorizer().is_sysadmin(user):
return {'success': False, 'msg': _('User %s not authorized to run the pending harvest jobs') % str(user)}
else:
return {'success': True}

View File

@ -1,6 +1,7 @@
from ckan.logic.schema import default_extras_schema from ckan.logic.schema import default_extras_schema
from ckan.logic.validators import (package_id_exists, from ckan.logic.validators import (package_id_exists,
name_validator, name_validator,
owner_org_validator,
package_name_validator, package_name_validator,
ignore_not_package_admin, ignore_not_package_admin,
) )
@ -29,8 +30,9 @@ def harvest_source_schema():
'source_type': [not_empty, unicode, harvest_source_type_exists, convert_to_extras], 'source_type': [not_empty, unicode, harvest_source_type_exists, convert_to_extras],
'title': [if_empty_same_as("name"), unicode], 'title': [if_empty_same_as("name"), unicode],
'notes': [ignore_missing, unicode], 'notes': [ignore_missing, unicode],
'owner_org': [owner_org_validator, unicode],
'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras], 'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras],
'state': [ignore_not_package_admin, ignore_missing], 'state': [ignore_missing],
'config': [ignore_missing, harvest_source_config_validator, convert_to_extras], 'config': [ignore_missing, harvest_source_config_validator, convert_to_extras],
'extras': default_extras_schema(), 'extras': default_extras_schema(),
'__extras': [ignore], '__extras': [ignore],

View File

@ -1,7 +1,6 @@
import types
from logging import getLogger from logging import getLogger
from pylons import config
from ckan import logic from ckan import logic
from ckan import model from ckan import model
import ckan.plugins as p import ckan.plugins as p
@ -134,6 +133,17 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
return harvest_source_form_to_db_schema() return harvest_source_form_to_db_schema()
def db_to_form_schema_options(self, options):
'''
Similar to db_to_form_schema but with further options to allow
slightly different schemas, eg for creation or deletion on the API.
'''
if options.get('type') == 'show':
return None
else:
return self.db_to_form_schema()
def db_to_form_schema(self): def db_to_form_schema(self):
''' '''
Returns the schema for mapping package data from the database into a Returns the schema for mapping package data from the database into a
@ -217,58 +227,26 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
p.toolkit.add_template_directory(config, templates) p.toolkit.add_template_directory(config, templates)
p.toolkit.add_public_directory(config, 'public') p.toolkit.add_public_directory(config, 'public')
def get_actions(self): ## IActions
from ckanext.harvest.logic.action.get import (harvest_source_show,
harvest_source_show_status,
harvest_source_list,
harvest_source_for_a_dataset,
harvest_job_show,
harvest_job_list,
harvest_object_show,
harvest_object_list,
harvesters_info_show,)
from ckanext.harvest.logic.action.create import (harvest_source_create,
harvest_job_create,
harvest_job_create_all,)
from ckanext.harvest.logic.action.update import (harvest_source_update,
harvest_objects_import,
harvest_jobs_run)
from ckanext.harvest.logic.action.delete import (harvest_source_delete,)
return { def get_actions(self):
'harvest_source_show': harvest_source_show,
'harvest_source_show_status': harvest_source_show_status, module_root = 'ckanext.harvest.logic.action'
'harvest_source_list': harvest_source_list, action_functions = _get_logic_functions(module_root)
'harvest_source_for_a_dataset': harvest_source_for_a_dataset,
'harvest_job_show': harvest_job_show, return action_functions
'harvest_job_list': harvest_job_list,
'harvest_object_show': harvest_object_show, ## IAuthFunctions
'harvest_object_list': harvest_object_list,
'harvesters_info_show': harvesters_info_show,
'harvest_source_create': harvest_source_create,
'harvest_job_create': harvest_job_create,
'harvest_job_create_all': harvest_job_create_all,
'harvest_source_update': harvest_source_update,
'harvest_source_delete': harvest_source_delete,
'harvest_objects_import': harvest_objects_import,
'harvest_jobs_run':harvest_jobs_run
}
def get_auth_functions(self): def get_auth_functions(self):
module_root = 'ckanext.harvest.logic.auth' module_root = 'ckanext.harvest.logic.auth'
auth_profile = config.get('ckan.harvest.auth.profile', '') auth_functions = _get_logic_functions(module_root)
auth_functions = _get_auth_functions(module_root)
if auth_profile:
module_root = '%s.%s' % (module_root, auth_profile)
auth_functions = _get_auth_functions(module_root,auth_functions)
log.debug('Using auth profile at %s' % module_root)
return auth_functions return auth_functions
## ITemplateHelpers ## ITemplateHelpers
def get_helpers(self): def get_helpers(self):
from ckanext.harvest import helpers as harvest_helpers from ckanext.harvest import helpers as harvest_helpers
return { return {
@ -279,25 +257,24 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
} }
def _get_auth_functions(module_root, auth_functions = {}): def _get_logic_functions(module_root, logic_functions = {}):
for auth_module_name in ['get', 'create', 'update','delete']: for module_name in ['get', 'create', 'update','delete']:
module_path = '%s.%s' % (module_root, auth_module_name,) module_path = '%s.%s' % (module_root, module_name,)
try: try:
module = __import__(module_path) module = __import__(module_path)
except ImportError,e: except ImportError:
log.debug('No auth module for action "%s"' % auth_module_name) log.debug('No auth module for action "{0}"'.format(module_name))
continue continue
for part in module_path.split('.')[1:]: for part in module_path.split('.')[1:]:
module = getattr(module, part) module = getattr(module, part)
for key, value in module.__dict__.items(): for key, value in module.__dict__.items():
if not key.startswith('_'): if not key.startswith('_') and isinstance(value, types.FunctionType):
auth_functions[key] = value logic_functions[key] = value
return logic_functions
return auth_functions
def _create_harvest_source_object(data_dict): def _create_harvest_source_object(data_dict):
''' '''

View File

@ -115,7 +115,13 @@ def gather_callback(channel, method, header, body):
harvester_found = True harvester_found = True
# Get a list of harvest object ids from the plugin # Get a list of harvest object ids from the plugin
job.gather_started = datetime.datetime.now() job.gather_started = datetime.datetime.now()
harvest_object_ids = harvester.gather_stage(job) try:
harvest_object_ids = harvester.gather_stage(job)
except Exception, e:
log.error('Gather stage failed unexpectedly: %s' % e)
job.status = 'Errored'
job.save()
continue
job.gather_finished = datetime.datetime.now() job.gather_finished = datetime.datetime.now()
job.save() job.save()
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids) log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
@ -160,6 +166,8 @@ def fetch_callback(channel, method, header, body):
obj.save() obj.save()
if obj.retry_times >= 5: if obj.retry_times >= 5:
obj.state = "ERROR"
obj.save()
log.error('Too many consecutive retries for object {0}'.format(obj.id)) log.error('Too many consecutive retries for object {0}'.format(obj.id))
channel.basic_ack(method.delivery_tag) channel.basic_ack(method.delivery_tag)
return False return False

View File

@ -2,6 +2,8 @@
<form id="source-new" class="form-horizontal" method="post" > <form id="source-new" class="form-horizontal" method="post" >
{% block errors %}{{ form.errors(error_summary) }}{% endblock %}
{% call form.input('url', id='field-url', label=_('URL'), value=data.url, error=errors.url, classes=['control-full', 'control-large']) %} {% call form.input('url', id='field-url', label=_('URL'), value=data.url, error=errors.url, classes=['control-full', 'control-large']) %}
<span class="info-block icon-large icon-info-sign"> <span class="info-block icon-large icon-info-sign">
{{ _('This should include the http:// part of the URL') }} {{ _('This should include the http:// part of the URL') }}
@ -32,7 +34,29 @@
{{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }} {{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }}
<div><b>TODO: state / delete</b> </div> {# if we have a default group then this wants remembering #}
{% if data.group_id %}
<input type="hidden" name="groups__0__id" value="{{ data.group_id }}" />
{% endif %}
{% set existing_org = data.owner_org or data.group_id %}
{% if h.check_access('sysadmin') or data.get('state', 'draft').startswith('draft') or data.get('state', 'none') == 'none' %}
{% set organizations_available = h.organizations_available('create_dataset') %}
{% if organizations_available %}
<div class="control-group">
<label for="field-organizations" class="control-label">{{ _('Organization') }}</label>
<div class="controls">
<select id="field-organizations" name="owner_org" data-module="autocomplete">
<option value="">{{ _('Select an organization...') }}</option>
{% for organization in organizations_available %}
{# get out first org from users list only if there is not an existing org #}
{% set selected_org = (existing_org and existing_org == organization.id) or (not existing_org and organization.id == organizations_available[0].id) %}
<option value="{{ organization.id }}" {% if selected_org %} selected="selected" {% endif %}>{{ organization.name }}</option>
{% endfor %}
</select>
</div>
</div>
{% endif %}
{% endif %}
<input id="save" name="save" value="Save" type="submit" class="btn"/> <input id="save" name="save" value="Save" type="submit" class="btn"/>