diff --git a/README.rst b/README.rst index 1789fb5..db8379c 100644 --- a/README.rst +++ b/README.rst @@ -139,6 +139,10 @@ following methods:: in the WUI. * description: a small description of what the harvester does. This will appear on the form as a guidance to the user. + * form_config_interface [optional]: Harvesters willing to store configuration + values in the database must provide this key. The only supported value is + 'Text'. This will enable the configuration text box in the form. See also + the ``validate_config`` method. A complete example may be:: @@ -152,6 +156,15 @@ following methods:: returns: A dictionary with the harvester descriptors ''' + def validate_config(self, config): + ''' + Harvesters can provide this method to validate the configuration entered in the + form. It should return a single string, which will be stored in the database. + Exceptions raised will be shown in the form's error messages. + + returns A string with the validated configuration options + ''' + def gather_stage(self, harvest_job): ''' The gather stage will recieve a HarvestJob object and will be diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index 1695a40..302f589 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -38,7 +38,6 @@ class ViewController(BaseController): data = data or {} errors = errors or {} error_summary = error_summary or {} - #TODO: Use new description interface to build the types select and descriptions vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()} c.form = render('source/new_source_form.html', extra_vars=vars) @@ -104,7 +103,7 @@ class ViewController(BaseController): def _check_data_dict(self, data_dict): '''Check if the return data is correct''' - surplus_keys_schema = ['id','publisher_id','user_id','active','save'] + surplus_keys_schema = ['id','publisher_id','user_id','active','save','config'] schema_keys = harvest_source_form_schema().keys() keys_in_schema = set(schema_keys) - set(surplus_keys_schema) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index acfd7b5..23ab27f 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -44,9 +44,19 @@ class CKANHarvester(HarvesterBase): return { 'name': 'ckan', 'title': 'CKAN', - 'description': 'Harvests remote CKAN instances' + 'description': 'Harvests remote CKAN instances', + 'form_config_interface':'Text' } + def validate_config(self,config): + try: + config_obj = json.loads(config) + except ValueError,e: + raise e + + return config + + def gather_stage(self,harvest_job): log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url) get_all_packages = True @@ -64,7 +74,7 @@ class CKANHarvester(HarvesterBase): base_url = harvest_job.source.url.rstrip('/') base_rest_url = base_url + self._get_rest_api_offset() base_search_url = base_url + self._get_search_api_offset() - + if previous_job and not previous_job.gather_errors: get_all_packages = False @@ -126,7 +136,7 @@ class CKANHarvester(HarvesterBase): return object_ids else: - self._save_gather_error('No packages received for URL: %s' % url, + self._save_gather_error('No packages received for URL: %s' % url, harvest_job) return None except Exception, e: @@ -159,7 +169,7 @@ class CKANHarvester(HarvesterBase): return False if harvest_object.content is None: - self._save_object_error('Empty content for object %s' % harvest_object.id, + self._save_object_error('Empty content for object %s' % harvest_object.id, harvest_object, 'Import') return False @@ -167,7 +177,7 @@ class CKANHarvester(HarvesterBase): package_dict = json.loads(harvest_object.content) return self._create_or_update_package(package_dict,harvest_object) except ValidationError,e: - self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict), + self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict), harvest_object, 'Import') except Exception, e: self._save_object_error('%r'%e,harvest_object,'Import') diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py index 5076c42..ebad538 100644 --- a/ckanext/harvest/lib/__init__.py +++ b/ckanext/harvest/lib/__init__.py @@ -228,7 +228,7 @@ def create_harvest_source(data_dict): source.url = data['url'] source.type = data['type'] - opt = ['active','description','user_id','publisher_id'] + opt = ['active','description','user_id','publisher_id','config'] for o in opt: if o in data and data[o] is not None: source.__setattr__(o,data[o]) @@ -245,14 +245,14 @@ def edit_harvest_source(source_id,data_dict): raise NotFound('Harvest source %s does not exist' % source_id) # Add source id to the dict, as some validators will need it - data_dict["id"] = source.id + data_dict['id'] = source.id data, errors = validate(data_dict, schema) if errors: Session.rollback() raise ValidationError(errors,_error_summary(errors)) - fields = ['url','type','active','description','user_id','publisher_id'] + fields = ['url','type','active','description','user_id','publisher_id','config'] for f in fields: if f in data_dict and data_dict[f] is not None and data_dict[f] != '': source.__setattr__(f,data_dict[f]) @@ -381,13 +381,13 @@ def import_last_objects(source_id=None): return imported_objects def get_registered_harvesters_info(): - # TODO: Use new description interface when implemented available_harvesters = [] for harvester in PluginImplementations(IHarvester): info = harvester.info() if not info or 'name' not in info: log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester)) continue + info['show_config'] = (info.get('form_config_interface','') == 'Text') available_harvesters.append(info) return available_harvesters diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index 8b76403..3af0c41 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -7,7 +7,8 @@ from ckan.lib.navl.validators import (ignore_missing, from ckanext.harvest.logic.validators import harvest_source_id_exists, \ harvest_source_url_validator, \ - harvest_source_type_exists + harvest_source_type_exists, \ + harvest_source_config_validator def default_harvest_source_schema(): @@ -19,7 +20,7 @@ def default_harvest_source_schema(): 'active': [ignore_missing], 'user_id': [ignore_missing], 'publisher_id': [ignore_missing], - #'config' + 'config': [harvest_source_config_validator] } return schema diff --git a/ckanext/harvest/logic/validators.py b/ckanext/harvest/logic/validators.py index 9f7343c..d555e3d 100644 --- a/ckanext/harvest/logic/validators.py +++ b/ckanext/harvest/logic/validators.py @@ -77,3 +77,20 @@ def harvest_source_type_exists(value,context): raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value) return value + +def harvest_source_config_validator(key,data,errors,context): + harvester_type = data.get(('type',),'') + for harvester in PluginImplementations(IHarvester): + info = harvester.info() + if info['name'] == harvester_type: + if info.get('form_config_interface','') != 'Text': + raise Invalid('This harvester does not allow configuration options: %s' % harvester_type) + + if harvester.validate_config: + try: + return harvester.validate_config(data[key]) + except Exception, e: + raise Invalid('Error parsing the configuration options: %s' % str(e)) + else: + return data[key] + diff --git a/ckanext/harvest/templates/source/new_source_form.html b/ckanext/harvest/templates/source/new_source_form.html index 578a233..72c94e8 100644 --- a/ckanext/harvest/templates/source/new_source_form.html +++ b/ckanext/harvest/templates/source/new_source_form.html @@ -12,18 +12,18 @@ -
- Details -
-
+
+ Details +
+
${errors.get('url', '')}
-
This should include the http:// part of the URL
-
-
+
This should include the http:// part of the URL
+
+
@@ -33,13 +33,24 @@
  • ${harvester.title}: ${harvester.description}
  • - - -
    -
    -
    You can add your own notes here about what the URL above represents to remind you later.
    -
    -
    - or Return to the harvest sources list - + + +
    +
    +
    You can add your own notes here about what the URL above represents to remind you later.
    +
    +
    +
    +
    + or Return to the harvest sources list +