diff --git a/README.rst b/README.rst index b6143e1..f40c748 100644 --- a/README.rst +++ b/README.rst @@ -100,6 +100,34 @@ the config explicitly though:: paster harvester sources --config=../ckan/development.ini +Authorization Profiles +====================== + +Starting from CKAN 1.6.1, the harvester extension offers the hability to use +different authorization profiles. These can be defined in your ini file as:: + + ckan.harvest.auth.profile = + +The two available profiles right now are: + +* `default`: This is the default profile, the same one that this extension has + used historically. Basically, only sysadmins can manage anything related to + harvesting, including creating and editing harvest sources or running harvest + jobs. + +* `publisher`: When using this profile, sysadmins can still perform any + harvesting related action, but in addition, users belonging to a publisher + (with role `admin`) can manage and run their own harvest sources and jobs. + Note that this requires CKAN core to also use the `publisher` authorization + profile, i.e you will also need to add:: + + ckan.auth.profile = publisher + + To know more about the CKAN publisher auth profile, visit + + http://wiki.ckan.org/Working_with_the_publisher_auth_profile + + The CKAN harverster =================== diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 46fd36e..754d9ac 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -2,8 +2,10 @@ import sys import re from pprint import pprint +from ckan import model +from ckan.logic import get_action, ValidationError + from ckan.lib.cli import CkanCommand -from ckanext.harvest.lib import * from ckanext.harvest.queue import get_gather_consumer, get_fetch_consumer class Harvester(CkanCommand): @@ -62,6 +64,13 @@ class Harvester(CkanCommand): def command(self): self._load_config() + + # We'll need a sysadmin user to perform most of the actions + # We will use the sysadmin site user (named as the site_id) + context = {'model':model,'session':model.Session,'ignore_auth':True} + self.admin_user = get_action('get_site_user')(context,{}) + + print '' if len(self.args) == 0: @@ -97,6 +106,9 @@ class Harvester(CkanCommand): self.import_stage() elif cmd == 'job-all': self.create_harvest_job_all() + elif cmd == 'harvesters-info': + harvesters_info = get_action('harvesters_info_show')() + pprint(harvesters_info) else: print 'Command %s not recognized' % cmd @@ -139,48 +151,50 @@ class Harvester(CkanCommand): else: publisher_id = u'' try: - source = create_harvest_source({ + data_dict = { 'url':url, 'type':type, 'config':config, 'active':active, 'user_id':user_id, - 'publisher_id':publisher_id}) + 'publisher_id':publisher_id} + context = {'model':model, 'session':model.Session, 'user': self.admin_user['name']} + source = get_action('harvest_source_create')(context,data_dict) print 'Created new harvest source:' self.print_harvest_source(source) - sources = get_harvest_sources() + sources = get_action('harvest_source_list')(context,{}) self.print_there_are('harvest source', sources) - # Create a Harvest Job for the new Source - create_harvest_job(source['id']) + # Create a harvest job for the new source + get_action('harvest_job_create')(context,{'source_id':source['id']}) print 'A new Harvest Job for this source has also been created' - except ValidationError,e: print 'An error occurred:' print str(e.error_dict) raise e - def remove_harvest_source(self): if len(self.args) >= 2: source_id = unicode(self.args[1]) else: print 'Please provide a source id' sys.exit(1) - - remove_harvest_source(source_id) + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + get_action('harvest_source_delete')(context,{'id':source_id}) print 'Removed harvest source: %s' % source_id def list_harvest_sources(self): if len(self.args) >= 2 and self.args[1] == 'all': - sources = get_harvest_sources() + data_dict = {} what = 'harvest source' else: - sources = get_harvest_sources(active=True) + data_dict = {'only_active':True} what = 'active harvest source' + context = {'model': model,'session':model.Session, 'user': self.admin_user['name']} + sources = get_action('harvest_source_list')(context,data_dict) self.print_harvest_sources(sources) self.print_there_are(what=what, sequence=sources) @@ -194,21 +208,21 @@ class Harvester(CkanCommand): job = create_harvest_job(source_id) self.print_harvest_job(job) - status = u'New' - jobs = get_harvest_jobs(status=status) + context = {'model': model,'session':model.Session, 'user': self.admin_user['name']} + jobs = get_action('harvest_job_list')(context,{'status':u'New'}) self.print_there_are('harvest jobs', jobs, condition=status) def list_harvest_jobs(self): - jobs = get_harvest_jobs() + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_job_list')(context,{}) + self.print_harvest_jobs(jobs) self.print_there_are(what='harvest job', sequence=jobs) def run_harvester(self): - try: - jobs = run_harvest_jobs() - except: - pass - sys.exit(0) + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_jobs_run')(context,{}) + #print 'Sent %s jobs to the gather queue' % len(jobs) def import_stage(self): @@ -216,12 +230,15 @@ class Harvester(CkanCommand): source_id = unicode(self.args[1]) else: source_id = None - objs = import_last_objects(source_id) + context = {'model': model, 'session':model.Session, 'user': self.admin_user['name']} + objs = get_action('harvest_objects_import')(context,{'source_id':source_id}) + print '%s objects reimported' % len(objs) def create_harvest_job_all(self): - jobs = create_harvest_job_all() - print "Created %s new harvest jobs" % len(jobs) + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_job_create_all')(context,{}) + print 'Created %s new harvest jobs' % len(jobs) def print_harvest_sources(self, sources): if sources: @@ -236,7 +253,7 @@ class Harvester(CkanCommand): print ' active: %s' % source['active'] print ' user: %s' % source['user_id'] print 'publisher: %s' % source['publisher_id'] - print ' jobs: %s' % len(source['jobs']) + print ' jobs: %s' % source['status']['job_count'] print '' def print_harvest_jobs(self, jobs): @@ -248,8 +265,7 @@ class Harvester(CkanCommand): def print_harvest_job(self, job): print ' Job id: %s' % job['id'] print ' status: %s' % job['status'] - print ' source: %s' % job['source']['id'] - print ' url: %s' % job['source']['url'] + print ' source: %s' % job['source'] print ' objects: %s' % len(job['objects']) print 'gather_errors: %s' % len(job['gather_errors']) diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index cbf48bc..c899fdd 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -2,34 +2,64 @@ from lxml import etree from lxml.etree import XMLSyntaxError from pylons.i18n import _ +from ckan.authz import Authorizer +from ckan import model +from ckan.model.group import Group + import ckan.lib.helpers as h, json from ckan.lib.base import BaseController, c, g, request, \ response, session, render, config, abort, redirect from ckan.lib.navl.dictization_functions import DataError -from ckan.logic import NotFound, ValidationError +from ckan.logic import NotFound, ValidationError, get_action, NotAuthorized from ckanext.harvest.logic.schema import harvest_source_form_schema -from ckanext.harvest.lib import create_harvest_source, edit_harvest_source, \ - get_harvest_source, get_harvest_sources, \ - create_harvest_job, get_registered_harvesters_info, \ - get_harvest_object + from ckan.lib.helpers import Page,pager_url + import logging log = logging.getLogger(__name__) class ViewController(BaseController): - def __before__(self, action, **env): - super(ViewController, self).__before__(action, **env) - # All calls to this controller must be with a sysadmin key - if not self.authorizer.is_sysadmin(c.user): - response_msg = _('Not authorized to see this page') - status = 401 - abort(status, response_msg) + not_auth_message = _('Not authorized to see this page') + + def __before__(self, action, **params): + + super(ViewController,self).__before__(action, **params) + + c.publisher_auth = (config.get('ckan.harvest.auth.profile',None) == 'publisher') + + def _get_publishers(self): + groups = None + + if c.publisher_auth: + if Authorizer().is_sysadmin(c.user): + groups = Group.all(group_type='publisher') + elif c.userobj: + groups = c.userobj.get_groups('publisher') + else: # anonymous user shouldn't have access to this page anyway. + groups = [] + + # Be explicit about which fields we make available in the template + groups = [ { + 'name': g.name, + 'id': g.id, + 'title': g.title, + } for g in groups ] + + return groups + def index(self): - # Request all harvest sources - c.sources = get_harvest_sources() + context = {'model':model, 'user':c.user,'session':model.Session} + try: + # Request all harvest sources + c.sources = get_action('harvest_source_list')(context,{}) + except NotAuthorized,e: + abort(401,self.not_auth_message) + + if c.publisher_auth: + c.sources = sorted(c.sources,key=lambda source : source['publisher_title']) return render('index.html') @@ -41,8 +71,16 @@ class ViewController(BaseController): data = data or {} errors = errors or {} error_summary = error_summary or {} - vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()} + try: + context = {'model':model, 'user':c.user} + harvesters_info = get_action('harvesters_info_show')(context,{}) + except NotAuthorized,e: + abort(401,self.not_auth_message) + + vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': harvesters_info} + + c.groups = self._get_publishers() c.form = render('source/new_source_form.html', extra_vars=vars) return render('source/new.html') @@ -50,15 +88,19 @@ class ViewController(BaseController): try: data_dict = dict(request.params) self._check_data_dict(data_dict) + context = {'model':model, 'user':c.user, 'session':model.Session, + 'schema':harvest_source_form_schema()} - source = create_harvest_source(data_dict) + source = get_action('harvest_source_create')(context,data_dict) # Create a harvest job for the new source - create_harvest_job(source['id']) + get_action('harvest_job_create')(context,{'source_id':source['id']}) h.flash_success(_('New harvest source added successfully.' 'A new harvest job for the source has also been created.')) - redirect(h.url_for('harvest')) + redirect('/harvest/%s' % source['id']) + except NotAuthorized,e: + abort(401,self.not_auth_message) except DataError,e: abort(400, 'Integrity Error') except ValidationError,e: @@ -71,30 +113,46 @@ class ViewController(BaseController): if ('save' in request.params) and not data: return self._save_edit(id) + if not data: try: - old_data = get_harvest_source(id) + context = {'model':model, 'user':c.user} + + old_data = get_action('harvest_source_show')(context, {'id':id}) except NotFound: abort(404, _('Harvest Source not found')) + except NotAuthorized,e: + abort(401,self.not_auth_message) data = data or old_data errors = errors or {} error_summary = error_summary or {} + try: + context = {'model':model, 'user':c.user} + harvesters_info = get_action('harvesters_info_show')(context,{}) + except NotAuthorized,e: + abort(401,self.not_auth_message) - vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()} + vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': harvesters_info} + c.groups = self._get_publishers() c.form = render('source/new_source_form.html', extra_vars=vars) return render('source/edit.html') def _save_edit(self,id): try: data_dict = dict(request.params) + data_dict['id'] = id self._check_data_dict(data_dict) + context = {'model':model, 'user':c.user, 'session':model.Session, + 'schema':harvest_source_form_schema()} - source = edit_harvest_source(id,data_dict) + source = get_action('harvest_source_update')(context,data_dict) h.flash_success(_('Harvest source edited successfully.')) - redirect(h.url_for('harvest')) + redirect('/harvest/%s' %id) + except NotAuthorized,e: + abort(401,self.not_auth_message) except DataError,e: abort(400, _('Integrity Error')) except NotFound, e: @@ -106,18 +164,23 @@ class ViewController(BaseController): def _check_data_dict(self, data_dict): '''Check if the return data is correct''' - surplus_keys_schema = ['id','publisher_id','user_id','active','save','config'] - + surplus_keys_schema = ['id','publisher_id','user_id','config','save'] schema_keys = harvest_source_form_schema().keys() keys_in_schema = set(schema_keys) - set(surplus_keys_schema) + # user_id is not yet used, we'll set the logged user one for the time being + if not data_dict.get('user_id',None): + if c.userobj: + data_dict['user_id'] = c.userobj.id if keys_in_schema - set(data_dict.keys()): log.info(_('Incorrect form fields posted')) raise DataError(data_dict) def read(self,id): try: - c.source = get_harvest_source(id) + context = {'model':model, 'user':c.user} + c.source = get_action('harvest_source_show')(context, {'id':id}) + c.page = Page( collection=c.source['status']['packages'], page=request.params.get('page', 1), @@ -128,24 +191,33 @@ class ViewController(BaseController): return render('source/read.html') except NotFound: abort(404,_('Harvest source not found')) + except NotAuthorized,e: + abort(401,self.not_auth_message) + def delete(self,id): try: - delete_harvest_source(id) + context = {'model':model, 'user':c.user} + get_action('harvest_source_delete')(context, {'id':id}) - h.flash_success(_('Harvesting source deleted successfully')) + h.flash_success(_('Harvesting source successfully inactivated')) redirect(h.url_for('harvest')) except NotFound: abort(404,_('Harvest source not found')) + except NotAuthorized,e: + abort(401,self.not_auth_message) def create_harvesting_job(self,id): try: - create_harvest_job(id) + context = {'model':model, 'user':c.user, 'session':model.Session} + get_action('harvest_job_create')(context,{'source_id':id}) h.flash_success(_('Refresh requested, harvesting will take place within 15 minutes.')) except NotFound: abort(404,_('Harvest source not found')) + except NotAuthorized,e: + abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % e.message h.flash_error(msg) @@ -153,23 +225,28 @@ class ViewController(BaseController): redirect(h.url_for('harvest')) def show_object(self,id): + try: - object = get_harvest_object(id) + context = {'model':model, 'user':c.user} + obj = get_action('harvest_object_show')(context, {'id':id}) + # Check content type. It will probably be either XML or JSON try: - etree.fromstring(object['content']) + etree.fromstring(obj['content']) response.content_type = 'application/xml' except XMLSyntaxError: try: - json.loads(object['content']) + json.loads(obj['content']) response.content_type = 'application/json' except ValueError: pass - response.headers["Content-Length"] = len(object['content']) - return object['content'] + response.headers['Content-Length'] = len(obj['content']) + return obj['content'] except NotFound: abort(404,_('Harvest object not found')) + except NotAuthorized,e: + abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % e.message h.flash_error(msg) diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py deleted file mode 100644 index aafe09f..0000000 --- a/ckanext/harvest/lib/__init__.py +++ /dev/null @@ -1,379 +0,0 @@ -import urlparse -import re - -from sqlalchemy import distinct,func -from ckan.model import Session, repo -from ckan.model import Package -from ckan.lib.navl.dictization_functions import validate -from ckan.logic import NotFound, ValidationError - -from ckanext.harvest.logic.schema import harvest_source_form_schema - -from ckan.plugins import PluginImplementations -from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \ - HarvestGatherError, HarvestObjectError -from ckanext.harvest.queue import get_gather_publisher -from ckanext.harvest.interfaces import IHarvester - -import logging -log = logging.getLogger('ckanext') - - -def _get_source_status(source, detailed=True): - out = dict() - job_count = HarvestJob.filter(source=source).count() - if not job_count: - out['msg'] = 'No jobs yet' - return out - out = {'next_harvest':'', - 'last_harvest_request':'', - 'last_harvest_statistics':{'added':0,'updated':0,'errors':0}, - 'last_harvest_errors':{'gather':[],'object':[]}, - 'overall_statistics':{'added':0, 'errors':0}, - 'packages':[]} - # Get next scheduled job - next_job = HarvestJob.filter(source=source,status=u'New').first() - if next_job: - out['next_harvest'] = 'Scheduled' - else: - out['next_harvest'] = 'Not yet scheduled' - - # Get the last finished job - last_job = HarvestJob.filter(source=source,status=u'Finished') \ - .order_by(HarvestJob.created.desc()).first() - - if last_job: - #TODO: Should we encode the dates as strings? - out['last_harvest_request'] = str(last_job.gather_finished) - - #Get HarvestObjects from last job whit links to packages - if detailed: - last_objects = [obj for obj in last_job.objects if obj.package is not None] - - if len(last_objects) == 0: - # No packages added or updated - out['last_harvest_statistics']['added'] = 0 - out['last_harvest_statistics']['updated'] = 0 - else: - # Check wether packages were added or updated - for last_object in last_objects: - # Check if the same package had been linked before - previous_objects = Session.query(HarvestObject) \ - .filter(HarvestObject.package==last_object.package) \ - .count() - - if previous_objects == 1: - # It didn't previously exist, it has been added - out['last_harvest_statistics']['added'] += 1 - else: - # Pacakge already existed, but it has been updated - out['last_harvest_statistics']['updated'] += 1 - - # Last harvest errors - # We have the gathering errors in last_job.gather_errors, so let's also - # get also the object errors. - object_errors = Session.query(HarvestObjectError).join(HarvestObject) \ - .filter(HarvestObject.job==last_job) - - out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \ - + object_errors.count() - if detailed: - for gather_error in last_job.gather_errors: - out['last_harvest_errors']['gather'].append(gather_error.message) - - for object_error in object_errors: - err = {'object_id':object_error.object.id,'object_guid':object_error.object.guid,'message': object_error.message} - out['last_harvest_errors']['object'].append(err) - - # Overall statistics - packages = Session.query(distinct(HarvestObject.package_id),Package.name) \ - .join(Package).join(HarvestSource) \ - .filter(HarvestObject.source==source) \ - .filter(HarvestObject.current==True) \ - .filter(Package.state==u'active') - - out['overall_statistics']['added'] = packages.count() - if detailed: - for package in packages: - out['packages'].append(package.name) - - gather_errors = Session.query(HarvestGatherError) \ - .join(HarvestJob).join(HarvestSource) \ - .filter(HarvestJob.source==source).count() - - object_errors = Session.query(HarvestObjectError) \ - .join(HarvestObject).join(HarvestJob).join(HarvestSource) \ - .filter(HarvestJob.source==source).count() - out['overall_statistics']['errors'] = gather_errors + object_errors - else: - out['last_harvest_request'] = 'Not yet harvested' - - return out - - -def _source_as_dict(source, detailed=True): - out = source.as_dict() - out['jobs'] = [] - - for job in source.jobs: - out['jobs'].append(job.as_dict()) - - out['status'] = _get_source_status(source, detailed=detailed) - - - return out - -def _job_as_dict(job): - out = job.as_dict() - out['source'] = job.source.as_dict() - out['objects'] = [] - out['gather_errors'] = [] - - for obj in job.objects: - out['objects'].append(obj.as_dict()) - - for error in job.gather_errors: - out['gather_errors'].append(error.as_dict()) - - return out - -def _object_as_dict(obj): - out = obj.as_dict() - out['source'] = obj.source.as_dict() - out['job'] = obj.job.as_dict() - - if obj.package: - out['package'] = obj.package.as_dict() - - out['errors'] = [] - - for error in obj.errors: - out['errors'].append(error.as_dict()) - - return out - -def _prettify(field_name): - field_name = re.sub('(? td{ + background-color: #E3E3E3 !important; + padding: 3px; + font-weight: bold; +} + diff --git a/ckanext/harvest/templates/index.html b/ckanext/harvest/templates/index.html index 9c03476..71b2aa6 100644 --- a/ckanext/harvest/templates/index.html +++ b/ckanext/harvest/templates/index.html @@ -26,7 +26,7 @@ - +
@@ -38,35 +38,49 @@ + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + +
View EditNext Harvest Created
${source['publisher_title']}${source['publisher_id']}
ViewEditRefresh${source.url[:50]}...${source.url}${source.type}${source.active}${source.status.msg}${source.status.msg}Datasets: ${source.status.overall_statistics.added}
- Last errors: ${source.status.last_harvest_statistics.errors}
${source.status.next_harvest}
ViewEditRefresh${source.url[:50]}...${source.url}${source.type}${source.active}${source.status.msg}${source.status.msg}Datasets: ${source.status.overall_statistics.added}
+ Last errors: ${source.status.last_harvest_statistics.errors}
${source.status.next_harvest}${h.render_datetime(source.created)}
${h.render_datetime(source.created)}
diff --git a/ckanext/harvest/templates/source/new_source_form.html b/ckanext/harvest/templates/source/new_source_form.html index d3c5adb..6437a95 100644 --- a/ckanext/harvest/templates/source/new_source_form.html +++ b/ckanext/harvest/templates/source/new_source_form.html @@ -44,8 +44,20 @@
You can add your own notes here about what the URL above represents to remind you later.
-
-
+ +
+
+ +
+
Cannot add any publishers.
+ + +
+
diff --git a/ckanext/harvest/templates/source/read.html b/ckanext/harvest/templates/source/read.html index 3ca8348..f75e5e9 100644 --- a/ckanext/harvest/templates/source/read.html +++ b/ckanext/harvest/templates/source/read.html @@ -15,7 +15,10 @@

Harvest Source Details

+ RefreshRefresh source | + Sources list + + @@ -51,13 +54,18 @@ - + - + + + + + + @@ -65,7 +73,7 @@ - + diff --git a/ckanext/harvest/tests/test_form_api.py b/ckanext/harvest/tests/_test_form_api.py similarity index 100% rename from ckanext/harvest/tests/test_form_api.py rename to ckanext/harvest/tests/_test_form_api.py diff --git a/ckanext/harvest/tests/test_harvest_source.py b/ckanext/harvest/tests/_test_harvest_source.py similarity index 100% rename from ckanext/harvest/tests/test_harvest_source.py rename to ckanext/harvest/tests/_test_harvest_source.py diff --git a/ckanext/harvest/tests/test_auth.py b/ckanext/harvest/tests/test_auth.py new file mode 100644 index 0000000..25cb134 --- /dev/null +++ b/ckanext/harvest/tests/test_auth.py @@ -0,0 +1,223 @@ +import logging +from pprint import pprint +from nose.plugins.skip import SkipTest; + +from ckan import model +from ckan.model import Package, Session +from ckan.lib.helpers import url_for,json +from ckan.lib.base import config + + +from ckan.tests import CreateTestData +from ckan.tests.functional.base import FunctionalTestCase + +from ckanext.harvest.plugin import Harvest +from ckanext.harvest.model import HarvestSource, HarvestJob, setup as harvest_model_setup + +log = logging.getLogger(__name__) + + +class HarvestAuthBaseCase(): + @classmethod + def setup_class(cls): + harvest_model_setup() + + @classmethod + def teardown_class(cls): + pass + + def _test_auth_not_allowed(self,user_name = None, source = None, status = 401): + + if not source: + # Create harvest source + source = HarvestSource(url=u'http://test-source.com',type='ckan') + Session.add(source) + Session.commit() + + if user_name: + extra_environ = {'REMOTE_USER': user_name.encode('utf8')} + else: + extra_environ = {} + + # List + res = self.app.get('/harvest', status=status, extra_environ=extra_environ) + # Create + res = self.app.get('/harvest/new', status=status, extra_environ=extra_environ) + # Read + res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) + # Edit + res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) + # Refresh + res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ) + + def _test_auth_allowed(self,user_name,auth_profile=None): + + extra_environ={'REMOTE_USER': user_name.encode('utf8')} + + # List + res = self.app.get('/harvest', extra_environ=extra_environ) + assert 'Harvesting Sources' in res + + # Create + res = self.app.get('/harvest/new', extra_environ=extra_environ) + assert 'New harvest source' in res + if auth_profile == 'publisher': + assert 'publisher_id' in res + else: + assert not 'publisher_id' in res + + fv = res.forms['source-new'] + fv['url'] = u'http://test-source.com' + fv['type'] = u'ckan' + fv['title'] = u'Test harvest source' + fv['description'] = u'Test harvest source' + fv['config'] = u'{"a":1,"b":2}' + + if auth_profile == 'publisher': + fv['publisher_id'] = self.publisher1.id + + res = fv.submit('save', extra_environ=extra_environ) + assert not 'Error' in res, res + + source = Session.query(HarvestSource).first() + assert source.url == u'http://test-source.com' + assert source.type == u'ckan' + + # Read + res = self.app.get('/harvest/%s' % source.id, extra_environ=extra_environ) + assert 'Harvest Source Details' in res + assert source.id in res + assert source.title in res + + # Edit + res = self.app.get('/harvest/edit/%s' % source.id, extra_environ=extra_environ) + assert 'Edit harvest source' in res + if auth_profile == 'publisher': + assert 'publisher_id' in res + else: + assert not 'publisher_id' in res + + fv = res.forms['source-new'] + fv['title'] = u'Test harvest source Updated' + + res = fv.submit('save', extra_environ=extra_environ) + assert not 'Error' in res, res + + source = Session.query(HarvestSource).first() + assert source.title == u'Test harvest source Updated' + + # Refresh + res = self.app.get('/harvest/refresh/%s' % source.id, extra_environ=extra_environ) + + job = Session.query(HarvestJob).first() + assert job.source_id == source.id + + + + +class TestAuthDefaultProfile(FunctionalTestCase,HarvestAuthBaseCase): + + @classmethod + def setup_class(cls): + if (config.get('ckan.harvest.auth.profile','') != ''): + raise SkipTest('Skipping default auth profile tests. Set ckan.harvest.auth.profile = \'\' to run them') + + super(TestAuthDefaultProfile,cls).setup_class() + + def setup(self): + CreateTestData.create() + self.sysadmin_user = model.User.get('testsysadmin') + self.normal_user = model.User.get('annafan') + + def teardown(self): + model.repo.rebuild_db() + + def test_auth_default_profile_sysadmin(self): + self._test_auth_allowed(self.sysadmin_user.name) + + def test_auth_default_profile_normal(self): + self._test_auth_not_allowed(self.normal_user.name) + + def test_auth_default_profile_notloggedin(self): + self._test_auth_not_allowed(status=302) + +class TestAuthPublisherProfile(FunctionalTestCase,HarvestAuthBaseCase): + + @classmethod + def setup_class(cls): + if (config.get('ckan.harvest.auth.profile') != 'publisher'): + raise SkipTest('Skipping publisher auth profile tests. Set ckan.harvest.auth.profile = \'publisher\' to run them') + + super(TestAuthPublisherProfile,cls).setup_class() + + def setup(self): + + model.Session.remove() + CreateTestData.create(auth_profile='publisher') + self.sysadmin_user = model.User.get('testsysadmin') + self.normal_user = model.User.get('annafan') # Does not belong to a publisher + self.publisher1_user = model.User.by_name('russianfan') + self.publisher2_user = model.User.by_name('tester') + + # Create two Publishers + rev = model.repo.new_revision() + self.publisher1 = model.Group(name=u'test-publisher1',title=u'Test Publihser 1',type=u'publisher') + Session.add(self.publisher1) + self.publisher2 = model.Group(name=u'test-publisher2',title=u'Test Publihser 2',type=u'publisher') + Session.add(self.publisher2) + + member1 = model.Member(table_name = 'user', + table_id = self.publisher1_user.id, + group=self.publisher1, + capacity='admin') + Session.add(member1) + member2 = model.Member(table_name = 'user', + table_id = self.publisher2_user.id, + group=self.publisher2, + capacity='admin') + Session.add(member2) + + Session.commit() + + def teardown(self): + model.repo.rebuild_db() + + def test_auth_publisher_profile_normal(self): + self._test_auth_not_allowed(self.normal_user.name) + + def test_auth_publisher_profile_notloggedin(self): + self._test_auth_not_allowed(status=302) + + def test_auth_publisher_profile_sysadmin(self): + self._test_auth_allowed(self.sysadmin_user.name,auth_profile='publisher') + + def test_auth_publisher_profile_publisher(self): + self._test_auth_allowed(self.publisher1_user.name,auth_profile='publisher') + + def test_auth_publisher_profile_different_publisher(self): + + # Create a source for publisher 1 + source = HarvestSource(url=u'http://test-source.com',type='ckan', + publisher_id=self.publisher1.id) + Session.add(source) + Session.commit() + + extra_environ = {'REMOTE_USER': self.publisher2_user.name.encode('utf8')} + + # List (Publihsers can see the sources list) + res = self.app.get('/harvest', extra_environ=extra_environ) + assert 'Harvesting Sources' in res + # Create + res = self.app.get('/harvest/new', extra_environ=extra_environ) + assert 'New harvest source' in res + assert 'publisher_id' in res + + # Check that this publihser is not allowed to manage sources from other publishers + status = 401 + # Read + res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) + # Edit + res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) + # Refresh + res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ) + diff --git a/test-core.ini b/test-core.ini new file mode 100644 index 0000000..6428eff --- /dev/null +++ b/test-core.ini @@ -0,0 +1,54 @@ +[DEFAULT] +debug = true +# Uncomment and replace with the address which should receive any error reports +#email_to = you@yourdomain.com +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 0.0.0.0 +port = 5000 + + +[app:main] +use = config:../ckan/test-core.ini +# Here we hard-code the database and a flag to make default tests +# run fast. +ckan.plugins = harvest ckan_harvester +# NB: other test configuration should go in test-core.ini, which is +# what the postgres tests use. + + +# Logging configuration +[loggers] +keys = root, ckan, sqlalchemy + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_ckan] +qualname = ckan +handlers = +level = INFO + +[logger_sqlalchemy] +handlers = +qualname = sqlalchemy.engine +level = WARN + +[handler_console] +class = StreamHandler +args = (sys.stdout,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s diff --git a/test.ini b/test.ini new file mode 100644 index 0000000..bdb886e --- /dev/null +++ b/test.ini @@ -0,0 +1,54 @@ +[DEFAULT] +debug = true +# Uncomment and replace with the address which should receive any error reports +#email_to = you@yourdomain.com +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 0.0.0.0 +port = 5000 + + +[app:main] +use = config:../ckan/test.ini +# Here we hard-code the database and a flag to make default tests +# run fast. +ckan.plugins = harvest ckan_harvester +# NB: other test configuration should go in test-core.ini, which is +# what the postgres tests use. + + +# Logging configuration +[loggers] +keys = root, ckan, sqlalchemy + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_ckan] +qualname = ckan +handlers = +level = INFO + +[logger_sqlalchemy] +handlers = +qualname = sqlalchemy.engine +level = WARN + +[handler_console] +class = StreamHandler +args = (sys.stdout,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s
ID-
User ${c.source.user_id}
Publisher${c.source.publisher_title}${c.source.publisher_id}
Created
Total jobs${len(c.source.jobs)}${c.source.status.job_count}
Status