diff --git a/README.rst b/README.rst index 6e50c65..bcc9cdb 100644 --- a/README.rst +++ b/README.rst @@ -94,32 +94,10 @@ The commands should be run with the pyenv activated and refer to your sites conf paster --plugin=ckanext-harvest harvester sources --config=mysite.ini -Authorization Profiles -====================== +Authorization +============= -Starting from CKAN 1.6.1, the harvester extension offers the ability to use -different authorization profiles. These can be defined in your ini file as:: - - ckan.harvest.auth.profile = - -The two available profiles right now are: - -* `default`: This is the default profile, the same one that this extension has - used historically. Basically, only sysadmins can manage anything related to - harvesting, including creating and editing harvest sources or running harvest - jobs. - -* `publisher`: When using this profile, sysadmins can still perform any - harvesting related action, but in addition, users belonging to a publisher - (with role `admin`) can manage and run their own harvest sources and jobs. - Note that this requires CKAN core to also use the `publisher` authorization - profile, i.e you will also need to add:: - - ckan.auth.profile = publisher - -To know more about the CKAN publisher auth profile, visit: - - http://oldwiki.ckan.org/Working_with_the_publisher_auth_profile +TODO The CKAN harvester diff --git a/ckanext/harvest/controllers/organization.py b/ckanext/harvest/controllers/organization.py index a11e7e4..58342f8 100644 --- a/ckanext/harvest/controllers/organization.py +++ b/ckanext/harvest/controllers/organization.py @@ -11,6 +11,9 @@ import ckan.new_authz from ckan.controllers.group import GroupController +from ckanext.harvest.plugin import DATASET_TYPE_NAME + + try: from collections import OrderedDict # 2.7 except ImportError: @@ -39,7 +42,7 @@ class OrganizationController(GroupController): except p.toolkit.NotAuthorized: abort(401, p.toolkit._('Unauthorized to read group %s') % id) - self._read(id, limit, dataset_type='harvest_source') + self._read(id, limit, dataset_type=DATASET_TYPE_NAME) return render('source/org_source_list.html') def _read(self, id, limit, dataset_type=None): diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index 0d2b907..ea01a5f 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -4,19 +4,13 @@ from lxml.etree import XMLSyntaxError from pylons.i18n import _ from ckan import model -from ckan.model.group import Group -import ckan.lib.helpers as h, json -from ckan.lib.base import BaseController, c, g, request, \ - response, session, render, config, abort, redirect - -from ckan.lib.navl.dictization_functions import DataError -from ckan.logic import NotFound, ValidationError, get_action, NotAuthorized -from ckanext.harvest.plugin import DATASET_TYPE_NAME -from ckanext.harvest.logic.schema import harvest_source_form_to_db_schema - -from ckan.lib.helpers import Page,pager_url import ckan.plugins as p +import ckan.lib.helpers as h, json +from ckan.lib.base import BaseController, c, \ + response, render, abort, redirect + +from ckanext.harvest.plugin import DATASET_TYPE_NAME import logging log = logging.getLogger(__name__) @@ -29,211 +23,29 @@ class ViewController(BaseController): super(ViewController,self).__before__(action, **params) - #TODO: remove - c.publisher_auth = (config.get('ckan.harvest.auth.profile',None) == 'publisher') - c.dataset_type = DATASET_TYPE_NAME - def _get_publishers(self): - groups = None - user = model.User.get(c.user) - if c.publisher_auth: - if user.sysadmin: - groups = Group.all(group_type='publisher') - elif c.userobj: - groups = c.userobj.get_groups('publisher') - else: # anonymous user shouldn't have access to this page anyway. - groups = [] - - # Be explicit about which fields we make available in the template - groups = [ { - 'name': g.name, - 'id': g.id, - 'title': g.title, - } for g in groups ] - - return groups - - - def index(self): - context = {'model':model, 'user':c.user,'session':model.Session} - try: - # Request all harvest sources - c.sources = get_action('harvest_source_list')(context,{}) - except NotAuthorized,e: - abort(401,self.not_auth_message) - - if c.publisher_auth: - c.sources = sorted(c.sources,key=lambda source : source['publisher_title']) - - c.status = config.get('ckan.harvest.status') - - return render('index.html') - - def new(self,data = None,errors = None, error_summary = None): - - if ('save' in request.params) and not data: - return self._save_new() - - data = data or {} - errors = errors or {} - error_summary = error_summary or {} - - try: - context = {'model':model, 'user':c.user} - harvesters_info = get_action('harvesters_info_show')(context,{}) - except NotAuthorized,e: - abort(401,self.not_auth_message) - - vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': harvesters_info} - - c.groups = self._get_publishers() - - vars['form_items'] = self._make_autoform_items(harvesters_info) - - c.form = render('source/old_new_source_form.html', extra_vars=vars) - return render('source/new.html') - - - - def _save_new(self): - try: - data_dict = dict(request.params) - self._check_data_dict(data_dict) - context = {'model':model, 'user':c.user, 'session':model.Session, - 'schema':harvest_source_form_schema()} - - source = get_action('harvest_source_create')(context,data_dict) - - # Create a harvest job for the new source - get_action('harvest_job_create')(context,{'source_id':source['id']}) - - h.flash_success(_('New harvest source added successfully.' - 'A new harvest job for the source has also been created.')) - redirect('/harvest/%s' % source['id']) - except NotAuthorized,e: - abort(401,self.not_auth_message) - except DataError,e: - abort(400, 'Integrity Error') - except ValidationError,e: - errors = e.error_dict - error_summary = e.error_summary if hasattr(e,'error_summary') else None - return self.new(data_dict, errors, error_summary) - - def edit(self, id, data = None,errors = None, error_summary = None): - - if ('save' in request.params) and not data: - return self._save_edit(id) - - - if not data: - try: - context = {'model':model, 'user':c.user} - - old_data = get_action('harvest_source_show')(context, {'id':id}) - except NotFound: - abort(404, _('Harvest Source not found')) - except NotAuthorized,e: - abort(401,self.not_auth_message) - - data = data or old_data - errors = errors or {} - error_summary = error_summary or {} - try: - context = {'model':model, 'user':c.user} - harvesters_info = get_action('harvesters_info_show')(context,{}) - except NotAuthorized,e: - abort(401,self.not_auth_message) - - vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': harvesters_info} - - c.groups = self._get_publishers() - - vars['form_items'] = self._make_autoform_items(harvesters_info) - - c.form = render('source/old_new_source_form.html', extra_vars=vars) - - return render('source/edit.html') - - def _save_edit(self,id): - try: - data_dict = dict(request.params) - data_dict['id'] = id - self._check_data_dict(data_dict) - context = {'model':model, 'user':c.user, 'session':model.Session, - 'schema':harvest_source_form_schema()} - - source = get_action('harvest_source_update')(context,data_dict) - - h.flash_success(_('Harvest source edited successfully.')) - redirect('/harvest/%s' %id) - except NotAuthorized,e: - abort(401,self.not_auth_message) - except DataError,e: - abort(400, _('Integrity Error')) - except NotFound, e: - abort(404, _('Harvest Source not found')) - except ValidationError,e: - errors = e.error_dict - error_summary = e.error_summary if hasattr(e,'error_summary') else None - return self.edit(id,data_dict, errors, error_summary) - - def _check_data_dict(self, data_dict): - '''Check if the return data is correct''' - - # TODO: remove frequency once it is added to the frontend! - surplus_keys_schema = ['id','publisher_id','user_id','config','save','frequency'] - schema_keys = harvest_source_form_to_db_schema().keys() - keys_in_schema = set(schema_keys) - set(surplus_keys_schema) - - # user_id is not yet used, we'll set the logged user one for the time being - if not data_dict.get('user_id',None): - if c.userobj: - data_dict['user_id'] = c.userobj.id - if keys_in_schema - set(data_dict.keys()): - log.info(_('Incorrect form fields posted')) - raise DataError(data_dict) - - def read(self,id): - try: - context = {'model':model, 'user':c.user} - c.source = get_action('harvest_source_show')(context, {'id':id}) - c.page = Page( - collection=c.source['status']['packages'], - page=request.params.get('page', 1), - items_per_page=20, - url=pager_url - ) - - return render('source/read.html') - except NotFound: - abort(404,_('Harvest source not found')) - except NotAuthorized,e: - abort(401,self.not_auth_message) - - - def delete(self,id): try: context = {'model':model, 'user':c.user} - get_action('harvest_source_delete')(context, {'id':id}) + p.toolkit.get_action('harvest_source_delete')(context, {'id':id}) h.flash_success(_('Harvesting source successfully inactivated')) redirect(h.url_for('harvest')) - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest source not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) - def create_harvesting_job(self,id): + def refresh(self, id): try: context = {'model':model, 'user':c.user, 'session':model.Session} - get_action('harvest_job_create')(context,{'source_id':id}) + p.toolkit.get_action('harvest_job_create')(context,{'source_id':id}) h.flash_success(_('Refresh requested, harvesting will take place within 15 minutes.')) - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest source not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) except Exception, e: if 'Can not create jobs on inactive sources' in str(e): @@ -245,13 +57,13 @@ class ViewController(BaseController): msg = 'An error occurred: [%s]' % str(e) h.flash_error(msg) - redirect(h.url_for('harvest')) + redirect(h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id)) def show_object(self,id): try: context = {'model':model, 'user':c.user} - obj = get_action('harvest_object_show')(context, {'id':id}) + obj = p.toolkit.get_action('harvest_object_show')(context, {'id':id}) # Check content type. It will probably be either XML or JSON try: @@ -278,9 +90,9 @@ class ViewController(BaseController): response.headers['Content-Length'] = len(content) return content.encode('utf-8') - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest object not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % str(e) @@ -293,10 +105,9 @@ class ViewController(BaseController): context = {'model': model, 'user': c.user} source_dict = p.toolkit.get_action('harvest_source_show')(context, {'id': source_id}) - except NotFound: + except p.toolkit.ObjectNotFound: abort(404, p.toolkit._('Harvest source not found')) - except NotAuthorized,e: - + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % str(e) @@ -308,20 +119,20 @@ class ViewController(BaseController): try: context = {'model':model, 'user':c.user} - c.job = get_action('harvest_job_show')(context, {'id': id}) - c.job_report = get_action('harvest_job_report')(context, {'id': id}) + c.job = p.toolkit.get_action('harvest_job_show')(context, {'id': id}) + c.job_report = p.toolkit.get_action('harvest_job_report')(context, {'id': id}) if not source_dict: - source_dict = get_action('harvest_source_show')(context, {'id': c.job['source_id']}) + source_dict = p.toolkit.get_action('harvest_source_show')(context, {'id': c.job['source_id']}) c.harvest_source = source_dict c.is_last_job = is_last return render('source/job/read.html') - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest job not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % str(e) @@ -330,22 +141,22 @@ class ViewController(BaseController): def about(self, id): try: context = {'model':model, 'user':c.user} - c.harvest_source = get_action('harvest_source_show')(context, {'id':id}) + c.harvest_source = p.toolkit.get_action('harvest_source_show')(context, {'id':id}) return render('source/about.html') - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest source not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) def admin(self, id): try: context = {'model':model, 'user':c.user} p.toolkit.check_access('harvest_source_update', context, {'id': id}) - c.harvest_source = get_action('harvest_source_show')(context, {'id':id}) + c.harvest_source = p.toolkit.get_action('harvest_source_show')(context, {'id':id}) return render('source/admin.html') - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest source not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized: abort(401,self.not_auth_message) def show_last_job(self, source): @@ -364,51 +175,15 @@ class ViewController(BaseController): try: context = {'model':model, 'user':c.user} - c.harvest_source = get_action('harvest_source_show')(context, {'id': source}) - c.jobs = get_action('harvest_job_list')(context, {'source_id': c.harvest_source['id']}) + c.harvest_source = p.toolkit.get_action('harvest_source_show')(context, {'id': source}) + c.jobs = p.toolkit.get_action('harvest_job_list')(context, {'source_id': c.harvest_source['id']}) return render('source/job/list.html') - except NotFound: + except p.toolkit.ObjectNotFound: abort(404,_('Harvest source not found')) - except NotAuthorized,e: + except p.toolkit.NotAuthorized, e: abort(401,self.not_auth_message) except Exception, e: msg = 'An error occurred: [%s]' % str(e) abort(500,msg) - - - def _make_autoform_items(self, harvesters_info): - states = [{'text': 'active', 'value': 'True'}, - {'text': 'withdrawn', 'value': 'False'},] - - harvest_list = [] - harvest_descriptions = p.toolkit.literal('') - - items = [ - {'name': 'url', 'control': 'input', 'label': _('URL'), 'placeholder': _(''), 'extra_info': 'This should include the http:// part of the URL'}, - {'name': 'type', 'control': 'select', 'options': harvest_list, 'label': _('Source type'), 'placeholder': _(''), 'extra_info': 'Which type of source does the URL above represent? '}, - {'control': 'html', 'html': harvest_descriptions}, - {'name': 'title', 'control': 'input', 'label': _('Title'), 'placeholder': _(''), 'extra_info': 'This will be shown as the datasets source.'}, - {'name': 'description', 'control': 'textarea', 'label': _('Description'), 'placeholder': _(''), 'extra_info':'You can add your own notes here about what the URL above represents to remind you later.'},] - - if c.groups: - pubs = [] - for group in c.groups: - pubs.append({'text':group['title'], 'value': group['id']}) - items.append({'name': 'publisher_id', 'control': 'select', 'options': pubs, 'label': _('Publisher'), 'placeholder': _('')}) - - items += [ - {'name': 'config', 'control': 'textarea', 'label': _('Configuration'), 'placeholder': _(''), 'extra_info': ''}, - {'name': 'active', 'control': 'select', 'options': states, 'label': _('State'), 'placeholder': _(''), 'extra_text': ''}, - ] - - return items diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index 144c889..aceff8e 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -1,4 +1,3 @@ -import re import logging from ckan import logic @@ -137,15 +136,3 @@ def _check_for_existing_jobs(context, source_id): exist = len(exist_new + exist_running) > 0 return exist - - -def _error_summary(error_dict): - error_summary = {} - for key, error in error_dict.iteritems(): - error_summary[_prettify(key)] = error[0] - return error_summary - -def _prettify(field_name): - field_name = re.sub('(?
diff --git a/ckanext/harvest/templates_new/source/admin_base.html b/ckanext/harvest/templates_new/source/admin_base.html index 3bf9701..c3f414f 100644 --- a/ckanext/harvest/templates_new/source/admin_base.html +++ b/ckanext/harvest/templates_new/source/admin_base.html @@ -11,7 +11,7 @@ {% if source.status.last_job and (source.status.last_job.status == 'New' or source.status.last_job.status == 'Running') %}
  • Refresh
  • {% else %} -
  • {{ h.nav_named_link(_('Refresh'), 'harvesting_job_create', id=source.id, class_='btn', icon='refresh')}}
  • +
  • {{ h.nav_named_link(_('Refresh'), '{0}_refresh'.format(c.dataset_type), id=source.id, class_='btn', icon='refresh')}}
  • {% endif %}
  • {{ h.nav_named_link(_('View harvest source'), '{0}_read'.format(c.dataset_type), id=source.name, class_='btn', icon='eye-open')}}
  • {% endblock %} diff --git a/ckanext/harvest/templates_new/source/new_source_form.html b/ckanext/harvest/templates_new/source/new_source_form.html index e39455b..330a9c3 100644 --- a/ckanext/harvest/templates_new/source/new_source_form.html +++ b/ckanext/harvest/templates_new/source/new_source_form.html @@ -13,10 +13,10 @@ {{ form.input('title', id='field-title', label=_('Title'), placeholder=_('eg. A descriptive title'), value=data.title, error=errors.title, classes=['control-full'], attrs={'data-module': 'slug-preview-target'}) }} - {% set prefix = 'harvest_source' %} - {% set domain = h.url_for(controller='package', action='read', id='', qualified=true) %} + {% set prefix = 'harvest' %} + {% set domain = h.url_for('{0}_read'.format(c.dataset_type), id='', qualified=true) %} {% set domain = domain|replace("http://", "")|replace("https://", "") %} - {% set attrs = {'data-module': 'slug-preview-slug', 'data-module-prefix': domain, 'data-module-placeholder': ''} %} + {% set attrs = {'data-module': 'slug-preview-slug', 'data-module-prefix': domain, 'data-module-placeholder': ''} %} {{ form.prepend('name', id='field-name', label=_('Name'), prepend=prefix, placeholder=_('eg. my-dataset'), value=data.name, error=errors.name, attrs=attrs) }}