Add a simple way for harvesters to store configuration options. If form_config_interface is Text on the info dictionary, the configuration field will be enabled in the form. Harvesters can also provide a validate_config method.
This commit is contained in:
parent
ca6af0249a
commit
6e75d362e3
13
README.rst
13
README.rst
|
@ -139,6 +139,10 @@ following methods::
|
||||||
in the WUI.
|
in the WUI.
|
||||||
* description: a small description of what the harvester does. This will
|
* description: a small description of what the harvester does. This will
|
||||||
appear on the form as a guidance to the user.
|
appear on the form as a guidance to the user.
|
||||||
|
* form_config_interface [optional]: Harvesters willing to store configuration
|
||||||
|
values in the database must provide this key. The only supported value is
|
||||||
|
'Text'. This will enable the configuration text box in the form. See also
|
||||||
|
the ``validate_config`` method.
|
||||||
|
|
||||||
A complete example may be::
|
A complete example may be::
|
||||||
|
|
||||||
|
@ -152,6 +156,15 @@ following methods::
|
||||||
returns: A dictionary with the harvester descriptors
|
returns: A dictionary with the harvester descriptors
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
def validate_config(self, config):
|
||||||
|
'''
|
||||||
|
Harvesters can provide this method to validate the configuration entered in the
|
||||||
|
form. It should return a single string, which will be stored in the database.
|
||||||
|
Exceptions raised will be shown in the form's error messages.
|
||||||
|
|
||||||
|
returns A string with the validated configuration options
|
||||||
|
'''
|
||||||
|
|
||||||
def gather_stage(self, harvest_job):
|
def gather_stage(self, harvest_job):
|
||||||
'''
|
'''
|
||||||
The gather stage will recieve a HarvestJob object and will be
|
The gather stage will recieve a HarvestJob object and will be
|
||||||
|
|
|
@ -38,7 +38,6 @@ class ViewController(BaseController):
|
||||||
data = data or {}
|
data = data or {}
|
||||||
errors = errors or {}
|
errors = errors or {}
|
||||||
error_summary = error_summary or {}
|
error_summary = error_summary or {}
|
||||||
#TODO: Use new description interface to build the types select and descriptions
|
|
||||||
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
|
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
|
||||||
|
|
||||||
c.form = render('source/new_source_form.html', extra_vars=vars)
|
c.form = render('source/new_source_form.html', extra_vars=vars)
|
||||||
|
@ -104,7 +103,7 @@ class ViewController(BaseController):
|
||||||
|
|
||||||
def _check_data_dict(self, data_dict):
|
def _check_data_dict(self, data_dict):
|
||||||
'''Check if the return data is correct'''
|
'''Check if the return data is correct'''
|
||||||
surplus_keys_schema = ['id','publisher_id','user_id','active','save']
|
surplus_keys_schema = ['id','publisher_id','user_id','active','save','config']
|
||||||
|
|
||||||
schema_keys = harvest_source_form_schema().keys()
|
schema_keys = harvest_source_form_schema().keys()
|
||||||
keys_in_schema = set(schema_keys) - set(surplus_keys_schema)
|
keys_in_schema = set(schema_keys) - set(surplus_keys_schema)
|
||||||
|
|
|
@ -44,9 +44,19 @@ class CKANHarvester(HarvesterBase):
|
||||||
return {
|
return {
|
||||||
'name': 'ckan',
|
'name': 'ckan',
|
||||||
'title': 'CKAN',
|
'title': 'CKAN',
|
||||||
'description': 'Harvests remote CKAN instances'
|
'description': 'Harvests remote CKAN instances',
|
||||||
|
'form_config_interface':'Text'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def validate_config(self,config):
|
||||||
|
try:
|
||||||
|
config_obj = json.loads(config)
|
||||||
|
except ValueError,e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
def gather_stage(self,harvest_job):
|
def gather_stage(self,harvest_job):
|
||||||
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
|
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
|
||||||
get_all_packages = True
|
get_all_packages = True
|
||||||
|
@ -64,7 +74,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
base_url = harvest_job.source.url.rstrip('/')
|
base_url = harvest_job.source.url.rstrip('/')
|
||||||
base_rest_url = base_url + self._get_rest_api_offset()
|
base_rest_url = base_url + self._get_rest_api_offset()
|
||||||
base_search_url = base_url + self._get_search_api_offset()
|
base_search_url = base_url + self._get_search_api_offset()
|
||||||
|
|
||||||
if previous_job and not previous_job.gather_errors:
|
if previous_job and not previous_job.gather_errors:
|
||||||
get_all_packages = False
|
get_all_packages = False
|
||||||
|
|
||||||
|
@ -126,7 +136,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
return object_ids
|
return object_ids
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._save_gather_error('No packages received for URL: %s' % url,
|
self._save_gather_error('No packages received for URL: %s' % url,
|
||||||
harvest_job)
|
harvest_job)
|
||||||
return None
|
return None
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
@ -159,7 +169,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if harvest_object.content is None:
|
if harvest_object.content is None:
|
||||||
self._save_object_error('Empty content for object %s' % harvest_object.id,
|
self._save_object_error('Empty content for object %s' % harvest_object.id,
|
||||||
harvest_object, 'Import')
|
harvest_object, 'Import')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -167,7 +177,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
package_dict = json.loads(harvest_object.content)
|
package_dict = json.loads(harvest_object.content)
|
||||||
return self._create_or_update_package(package_dict,harvest_object)
|
return self._create_or_update_package(package_dict,harvest_object)
|
||||||
except ValidationError,e:
|
except ValidationError,e:
|
||||||
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
|
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
|
||||||
harvest_object, 'Import')
|
harvest_object, 'Import')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self._save_object_error('%r'%e,harvest_object,'Import')
|
self._save_object_error('%r'%e,harvest_object,'Import')
|
||||||
|
|
|
@ -228,7 +228,7 @@ def create_harvest_source(data_dict):
|
||||||
source.url = data['url']
|
source.url = data['url']
|
||||||
source.type = data['type']
|
source.type = data['type']
|
||||||
|
|
||||||
opt = ['active','description','user_id','publisher_id']
|
opt = ['active','description','user_id','publisher_id','config']
|
||||||
for o in opt:
|
for o in opt:
|
||||||
if o in data and data[o] is not None:
|
if o in data and data[o] is not None:
|
||||||
source.__setattr__(o,data[o])
|
source.__setattr__(o,data[o])
|
||||||
|
@ -245,14 +245,14 @@ def edit_harvest_source(source_id,data_dict):
|
||||||
raise NotFound('Harvest source %s does not exist' % source_id)
|
raise NotFound('Harvest source %s does not exist' % source_id)
|
||||||
|
|
||||||
# Add source id to the dict, as some validators will need it
|
# Add source id to the dict, as some validators will need it
|
||||||
data_dict["id"] = source.id
|
data_dict['id'] = source.id
|
||||||
|
|
||||||
data, errors = validate(data_dict, schema)
|
data, errors = validate(data_dict, schema)
|
||||||
if errors:
|
if errors:
|
||||||
Session.rollback()
|
Session.rollback()
|
||||||
raise ValidationError(errors,_error_summary(errors))
|
raise ValidationError(errors,_error_summary(errors))
|
||||||
|
|
||||||
fields = ['url','type','active','description','user_id','publisher_id']
|
fields = ['url','type','active','description','user_id','publisher_id','config']
|
||||||
for f in fields:
|
for f in fields:
|
||||||
if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
|
if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
|
||||||
source.__setattr__(f,data_dict[f])
|
source.__setattr__(f,data_dict[f])
|
||||||
|
@ -381,13 +381,13 @@ def import_last_objects(source_id=None):
|
||||||
return imported_objects
|
return imported_objects
|
||||||
|
|
||||||
def get_registered_harvesters_info():
|
def get_registered_harvesters_info():
|
||||||
# TODO: Use new description interface when implemented
|
|
||||||
available_harvesters = []
|
available_harvesters = []
|
||||||
for harvester in PluginImplementations(IHarvester):
|
for harvester in PluginImplementations(IHarvester):
|
||||||
info = harvester.info()
|
info = harvester.info()
|
||||||
if not info or 'name' not in info:
|
if not info or 'name' not in info:
|
||||||
log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
|
log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
|
||||||
continue
|
continue
|
||||||
|
info['show_config'] = (info.get('form_config_interface','') == 'Text')
|
||||||
available_harvesters.append(info)
|
available_harvesters.append(info)
|
||||||
|
|
||||||
return available_harvesters
|
return available_harvesters
|
||||||
|
|
|
@ -7,7 +7,8 @@ from ckan.lib.navl.validators import (ignore_missing,
|
||||||
|
|
||||||
from ckanext.harvest.logic.validators import harvest_source_id_exists, \
|
from ckanext.harvest.logic.validators import harvest_source_id_exists, \
|
||||||
harvest_source_url_validator, \
|
harvest_source_url_validator, \
|
||||||
harvest_source_type_exists
|
harvest_source_type_exists, \
|
||||||
|
harvest_source_config_validator
|
||||||
|
|
||||||
def default_harvest_source_schema():
|
def default_harvest_source_schema():
|
||||||
|
|
||||||
|
@ -19,7 +20,7 @@ def default_harvest_source_schema():
|
||||||
'active': [ignore_missing],
|
'active': [ignore_missing],
|
||||||
'user_id': [ignore_missing],
|
'user_id': [ignore_missing],
|
||||||
'publisher_id': [ignore_missing],
|
'publisher_id': [ignore_missing],
|
||||||
#'config'
|
'config': [harvest_source_config_validator]
|
||||||
}
|
}
|
||||||
|
|
||||||
return schema
|
return schema
|
||||||
|
|
|
@ -77,3 +77,20 @@ def harvest_source_type_exists(value,context):
|
||||||
raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
|
raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
def harvest_source_config_validator(key,data,errors,context):
|
||||||
|
harvester_type = data.get(('type',),'')
|
||||||
|
for harvester in PluginImplementations(IHarvester):
|
||||||
|
info = harvester.info()
|
||||||
|
if info['name'] == harvester_type:
|
||||||
|
if info.get('form_config_interface','') != 'Text':
|
||||||
|
raise Invalid('This harvester does not allow configuration options: %s' % harvester_type)
|
||||||
|
|
||||||
|
if harvester.validate_config:
|
||||||
|
try:
|
||||||
|
return harvester.validate_config(data[key])
|
||||||
|
except Exception, e:
|
||||||
|
raise Invalid('Error parsing the configuration options: %s' % str(e))
|
||||||
|
else:
|
||||||
|
return data[key]
|
||||||
|
|
||||||
|
|
|
@ -12,18 +12,18 @@
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Details</legend>
|
<legend>Details</legend>
|
||||||
<dl>
|
<dl>
|
||||||
<dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
|
<dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
|
||||||
<dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd>
|
<dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd>
|
||||||
<dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
|
<dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
|
||||||
<dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
|
<dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
|
||||||
<dt><label class="field_req" for="type">Source Type *</label></dt>
|
<dt><label class="field_req" for="type">Source Type *</label></dt>
|
||||||
<dd>
|
<dd>
|
||||||
<select id="type" name="type">
|
<select id="type" name="type">
|
||||||
<py:for each="harvester in harvesters">
|
<py:for each="harvester in harvesters">
|
||||||
<option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None}" >${harvester.title}</option>
|
<option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None, 'data-config': harvester.show_config}" >${harvester.title}</option>
|
||||||
</py:for>
|
</py:for>
|
||||||
</select>
|
</select>
|
||||||
</dd>
|
</dd>
|
||||||
|
@ -33,13 +33,24 @@
|
||||||
<py:for each="harvester in harvesters">
|
<py:for each="harvester in harvesters">
|
||||||
<li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li>
|
<li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li>
|
||||||
</py:for>
|
</py:for>
|
||||||
</ul>
|
</ul>
|
||||||
</dd>
|
</dd>
|
||||||
<dt><label class="field_opt" for="description">Description</label></dt>
|
<dt><label class="field_opt" for="description">Description</label></dt>
|
||||||
<dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
|
<dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
|
||||||
<dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
|
<dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
|
||||||
</dl>
|
<dt><label class="field_opt" for="config">Configuration</label></dt>
|
||||||
</fieldset>
|
<dd><textarea id="config" name="config" cols="30" rows="2" style="height:75px">${data.get('config', '')}</textarea></dd>
|
||||||
<input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
|
</dl>
|
||||||
|
</fieldset>
|
||||||
|
<input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
|
||||||
|
<script type="text/javascript">
|
||||||
|
$(document).ready(function() {
|
||||||
|
$("#type").change(function(){
|
||||||
|
var show_config = ($("#type option:selected").attr("data-config") == "True");
|
||||||
|
if (!show_config) $("#config").val("");
|
||||||
|
$("#config").attr("disabled", !show_config);
|
||||||
|
});
|
||||||
|
$("#type").trigger("change");
|
||||||
|
});
|
||||||
|
</script>
|
||||||
</form>
|
</form>
|
||||||
|
|
Loading…
Reference in New Issue