Add a simple way for harvesters to store configuration options. If form_config_interface is Text on the info dictionary, the configuration field will be enabled in the form. Harvesters can also provide a validate_config method.

This commit is contained in:
Adrià Mercader 2011-06-07 12:07:53 +01:00
parent ca6af0249a
commit 6e75d362e3
7 changed files with 81 additions and 30 deletions

View File

@ -139,6 +139,10 @@ following methods::
in the WUI.
* description: a small description of what the harvester does. This will
appear on the form as a guidance to the user.
* form_config_interface [optional]: Harvesters willing to store configuration
values in the database must provide this key. The only supported value is
'Text'. This will enable the configuration text box in the form. See also
the ``validate_config`` method.
A complete example may be::
@ -152,6 +156,15 @@ following methods::
returns: A dictionary with the harvester descriptors
'''
def validate_config(self, config):
'''
Harvesters can provide this method to validate the configuration entered in the
form. It should return a single string, which will be stored in the database.
Exceptions raised will be shown in the form's error messages.
returns A string with the validated configuration options
'''
def gather_stage(self, harvest_job):
'''
The gather stage will recieve a HarvestJob object and will be

View File

@ -38,7 +38,6 @@ class ViewController(BaseController):
data = data or {}
errors = errors or {}
error_summary = error_summary or {}
#TODO: Use new description interface to build the types select and descriptions
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
c.form = render('source/new_source_form.html', extra_vars=vars)
@ -104,7 +103,7 @@ class ViewController(BaseController):
def _check_data_dict(self, data_dict):
'''Check if the return data is correct'''
surplus_keys_schema = ['id','publisher_id','user_id','active','save']
surplus_keys_schema = ['id','publisher_id','user_id','active','save','config']
schema_keys = harvest_source_form_schema().keys()
keys_in_schema = set(schema_keys) - set(surplus_keys_schema)

View File

@ -44,9 +44,19 @@ class CKANHarvester(HarvesterBase):
return {
'name': 'ckan',
'title': 'CKAN',
'description': 'Harvests remote CKAN instances'
'description': 'Harvests remote CKAN instances',
'form_config_interface':'Text'
}
def validate_config(self,config):
try:
config_obj = json.loads(config)
except ValueError,e:
raise e
return config
def gather_stage(self,harvest_job):
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
get_all_packages = True
@ -64,7 +74,7 @@ class CKANHarvester(HarvesterBase):
base_url = harvest_job.source.url.rstrip('/')
base_rest_url = base_url + self._get_rest_api_offset()
base_search_url = base_url + self._get_search_api_offset()
if previous_job and not previous_job.gather_errors:
get_all_packages = False
@ -126,7 +136,7 @@ class CKANHarvester(HarvesterBase):
return object_ids
else:
self._save_gather_error('No packages received for URL: %s' % url,
self._save_gather_error('No packages received for URL: %s' % url,
harvest_job)
return None
except Exception, e:
@ -159,7 +169,7 @@ class CKANHarvester(HarvesterBase):
return False
if harvest_object.content is None:
self._save_object_error('Empty content for object %s' % harvest_object.id,
self._save_object_error('Empty content for object %s' % harvest_object.id,
harvest_object, 'Import')
return False
@ -167,7 +177,7 @@ class CKANHarvester(HarvesterBase):
package_dict = json.loads(harvest_object.content)
return self._create_or_update_package(package_dict,harvest_object)
except ValidationError,e:
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
harvest_object, 'Import')
except Exception, e:
self._save_object_error('%r'%e,harvest_object,'Import')

View File

@ -228,7 +228,7 @@ def create_harvest_source(data_dict):
source.url = data['url']
source.type = data['type']
opt = ['active','description','user_id','publisher_id']
opt = ['active','description','user_id','publisher_id','config']
for o in opt:
if o in data and data[o] is not None:
source.__setattr__(o,data[o])
@ -245,14 +245,14 @@ def edit_harvest_source(source_id,data_dict):
raise NotFound('Harvest source %s does not exist' % source_id)
# Add source id to the dict, as some validators will need it
data_dict["id"] = source.id
data_dict['id'] = source.id
data, errors = validate(data_dict, schema)
if errors:
Session.rollback()
raise ValidationError(errors,_error_summary(errors))
fields = ['url','type','active','description','user_id','publisher_id']
fields = ['url','type','active','description','user_id','publisher_id','config']
for f in fields:
if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
source.__setattr__(f,data_dict[f])
@ -381,13 +381,13 @@ def import_last_objects(source_id=None):
return imported_objects
def get_registered_harvesters_info():
# TODO: Use new description interface when implemented
available_harvesters = []
for harvester in PluginImplementations(IHarvester):
info = harvester.info()
if not info or 'name' not in info:
log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
continue
info['show_config'] = (info.get('form_config_interface','') == 'Text')
available_harvesters.append(info)
return available_harvesters

View File

@ -7,7 +7,8 @@ from ckan.lib.navl.validators import (ignore_missing,
from ckanext.harvest.logic.validators import harvest_source_id_exists, \
harvest_source_url_validator, \
harvest_source_type_exists
harvest_source_type_exists, \
harvest_source_config_validator
def default_harvest_source_schema():
@ -19,7 +20,7 @@ def default_harvest_source_schema():
'active': [ignore_missing],
'user_id': [ignore_missing],
'publisher_id': [ignore_missing],
#'config'
'config': [harvest_source_config_validator]
}
return schema

View File

@ -77,3 +77,20 @@ def harvest_source_type_exists(value,context):
raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
return value
def harvest_source_config_validator(key,data,errors,context):
harvester_type = data.get(('type',),'')
for harvester in PluginImplementations(IHarvester):
info = harvester.info()
if info['name'] == harvester_type:
if info.get('form_config_interface','') != 'Text':
raise Invalid('This harvester does not allow configuration options: %s' % harvester_type)
if harvester.validate_config:
try:
return harvester.validate_config(data[key])
except Exception, e:
raise Invalid('Error parsing the configuration options: %s' % str(e))
else:
return data[key]

View File

@ -12,18 +12,18 @@
</ul>
</div>
<fieldset>
<legend>Details</legend>
<dl>
<dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
<fieldset>
<legend>Details</legend>
<dl>
<dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
<dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd>
<dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
<dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
<dt><label class="field_req" for="type">Source Type *</label></dt>
<dd>
<dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
<dt><label class="field_req" for="type">Source Type *</label></dt>
<dd>
<select id="type" name="type">
<py:for each="harvester in harvesters">
<option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None}" >${harvester.title}</option>
<option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None, 'data-config': harvester.show_config}" >${harvester.title}</option>
</py:for>
</select>
</dd>
@ -33,13 +33,24 @@
<py:for each="harvester in harvesters">
<li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li>
</py:for>
</ul>
</dd>
<dt><label class="field_opt" for="description">Description</label></dt>
<dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
<dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
</dl>
</fieldset>
<input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
</ul>
</dd>
<dt><label class="field_opt" for="description">Description</label></dt>
<dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
<dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
<dt><label class="field_opt" for="config">Configuration</label></dt>
<dd><textarea id="config" name="config" cols="30" rows="2" style="height:75px">${data.get('config', '')}</textarea></dd>
</dl>
</fieldset>
<input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
<script type="text/javascript">
$(document).ready(function() {
$("#type").change(function(){
var show_config = ($("#type option:selected").attr("data-config") == "True");
if (!show_config) $("#config").val("");
$("#config").attr("disabled", !show_config);
});
$("#type").trigger("change");
});
</script>
</form>