diff --git a/README.rst b/README.rst index 7d553b7..c540d4f 100644 --- a/README.rst +++ b/README.rst @@ -113,6 +113,30 @@ to add the `ckan_harvester` plugin to your options file: After adding it, a 'CKAN' option should appear in the 'New harvest source' form. +The CKAN harvesters support a number of configuration options to control their +behaviour. Those need to defined as a JSON object in the configuration form +field. The currently supported configuration options are: + +* api_version: You can force the harvester to use eithoer version '1' or + '2' of the CKAN API. Default is '2'. + +* default_tags: A list of tags that will be added to all harvested datasets. + Tags don't need to previously exist. + +* default_groups: A list of groups to which the harvested datasets will be + added to. The groups must exist. Note that you must use ids or names to + define the groups according to the API version you defined (names for + version '1', ids for version '2') + +Here is an example of a configuration object (the one that must be entered in +the configuration field):: + + { + "api_version":"1", + "default_tags":["new-tag-1","new-tag-2"], + "default_groups":["my-own-group"] + } + The harvesting interface ======================== diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 965b541..3a8de25 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -106,11 +106,17 @@ class HarvesterBase(SingletonPlugin): schema = default_package_schema() schema["id"] = [ignore_missing, unicode] + # Check API version + if self.config: + api_version = self.config.get('api_version','2') + else: + api_verion = '2' + context = { 'model': model, 'session': Session, 'user': u'harvest', - 'api_version':'2', + 'api_version': api_version, 'schema': schema, } diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 0354bf5..208daa5 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -1,7 +1,9 @@ import urllib2 +from ckan.lib.base import c +from ckan import model from ckan.model import Session, Package -from ckan.logic import ValidationError, NotFound +from ckan.logic import ValidationError, NotFound, get_action from ckan.lib.helpers import json from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, \ @@ -65,6 +67,16 @@ class CKANHarvester(HarvesterBase): try: config_obj = json.loads(config) + + if 'default_groups' in config_obj: + # Check if default groups exist + context = {'model':model,'user':c.user} + for group_name in config_obj['default_groups']: + try: + group = get_action('group_show')(context,{'id':group_name}) + except NotFound,e: + raise ValueError('Default group not found') + except ValueError,e: raise e @@ -196,6 +208,24 @@ class CKANHarvester(HarvesterBase): try: package_dict = json.loads(harvest_object.content) + + # Set default tags if needed + default_tags = self.config.get('default_tags',[]) + if default_tags: + if not 'tags' in package_dict: + package_dict['tags'] = [] + package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']]) + + # Ignore remote groups for the time being + del package_dict['groups'] + + # Set default groups if needed + default_groups = self.config.get('default_groups',[]) + if default_groups: + if not 'groups' in package_dict: + package_dict['groups'] = [] + package_dict['groups'].extend([g for g in default_groups if g not in package_dict['groups']]) + return self._create_or_update_package(package_dict,harvest_object) except ValidationError,e: self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),