[ckan harvester] Add support for defining default extras
This commit is contained in:
parent
ae51093213
commit
eb646b3385
33
README.rst
33
README.rst
|
@ -117,25 +117,38 @@ The CKAN harvesters support a number of configuration options to control their
|
|||
behaviour. Those need to defined as a JSON object in the configuration form
|
||||
field. The currently supported configuration options are:
|
||||
|
||||
* api_version: You can force the harvester to use either version '1' or
|
||||
'2' of the CKAN API. Default is '2'.
|
||||
* api_version: You can force the harvester to use either version '1' or '2' of
|
||||
the CKAN API. Default is '2'.
|
||||
|
||||
* default_tags: A list of tags that will be added to all harvested datasets.
|
||||
* default_tags: A list of tags that will be added to all harvested datasets.
|
||||
Tags don't need to previously exist.
|
||||
|
||||
* default_groups: A list of groups to which the harvested datasets will be
|
||||
* default_groups: A list of groups to which the harvested datasets will be
|
||||
added to. The groups must exist. Note that you must use ids or names to
|
||||
define the groups according to the API version you defined (names for
|
||||
version '1', ids for version '2')
|
||||
define the groups according to the API version you defined (names for version
|
||||
'1', ids for version '2').
|
||||
|
||||
* user: User who will run the harvesting process. Please note that this user
|
||||
* default_extras: A dictionary of key value pairs that will be added to extras
|
||||
of the harvested datasets. You can use the following replacement strings,
|
||||
that will be replaced before creating or updating the datasets:
|
||||
|
||||
* {dataset_id}
|
||||
* {harvest_source_id}
|
||||
* {harvest_source_url} # Will be stripped of trailing forward slashes (/)
|
||||
* {harvest_job_id}
|
||||
* {harvest_object_id}
|
||||
|
||||
* override_extras: Assign default extras even if they already exist in the
|
||||
remote dataset. Default is False (only non existing extras are added).
|
||||
|
||||
* user: User who will run the harvesting process. Please note that this user
|
||||
needs to have permission for creating packages, and if default groups were
|
||||
defined, the user must have permission to assign packages to these groups.
|
||||
|
||||
* api_key: If the remote CKAN instance has restricted access to the API you
|
||||
* api_key: If the remote CKAN instance has restricted access to the API you
|
||||
can provide a CKAN API key, which will be sent in any request.
|
||||
|
||||
* read_only: Create harvested packages in read-only mode. Only the user who
|
||||
* read_only: Create harvested packages in read-only mode. Only the user who
|
||||
performed the harvest (the one defined in the previous setting or the
|
||||
'harvest' sysadmin) will be able to edit and administer the packages
|
||||
created from this harvesting source. Logged in users and visitors will be
|
||||
|
@ -148,6 +161,8 @@ the configuration field)::
|
|||
"api_version":"1",
|
||||
"default_tags":["new-tag-1","new-tag-2"],
|
||||
"default_groups":["my-own-group"],
|
||||
"default_extras":{"new_extra":"Test",harvest_url":"{harvest_source_url}/dataset/{dataset_id}"},
|
||||
"override_extras": true,
|
||||
"user":"harverster-user",
|
||||
"api_key":"<REMOTE_API_KEY>",
|
||||
"read_only": true
|
||||
|
|
|
@ -71,7 +71,14 @@ class CKANHarvester(HarvesterBase):
|
|||
try:
|
||||
config_obj = json.loads(config)
|
||||
|
||||
if 'default_tags' in config_obj:
|
||||
if not isinstance(config_obj['default_tags'],list):
|
||||
raise ValueError('default_tags must be a list')
|
||||
|
||||
if 'default_groups' in config_obj:
|
||||
if not isinstance(config_obj['default_groups'],list):
|
||||
raise ValueError('default_groups must be a list')
|
||||
|
||||
# Check if default groups exist
|
||||
context = {'model':model,'user':c.user}
|
||||
for group_name in config_obj['default_groups']:
|
||||
|
@ -80,6 +87,10 @@ class CKANHarvester(HarvesterBase):
|
|||
except NotFound,e:
|
||||
raise ValueError('Default group not found')
|
||||
|
||||
if 'default_extras' in config_obj:
|
||||
if not isinstance(config_obj['default_extras'],dict):
|
||||
raise ValueError('default_extras must be a dictionary')
|
||||
|
||||
if 'user' in config_obj:
|
||||
# Check if user exists
|
||||
context = {'model':model,'user':c.user}
|
||||
|
@ -237,6 +248,23 @@ class CKANHarvester(HarvesterBase):
|
|||
package_dict['groups'] = []
|
||||
package_dict['groups'].extend([g for g in default_groups if g not in package_dict['groups']])
|
||||
|
||||
# Set default extras if needed
|
||||
default_extras = self.config.get('default_extras',{})
|
||||
if default_extras:
|
||||
override_extras = self.config.get('override_extras',False)
|
||||
if not 'extras' in package_dict:
|
||||
package_dict['extras'] = {}
|
||||
for key,value in default_extras.iteritems():
|
||||
if not key in package_dict['extras'] or override_extras:
|
||||
# Look for replacement strings
|
||||
if isinstance(value,basestring):
|
||||
value = value.format(harvest_source_id=harvest_object.job.source.id,
|
||||
harvest_source_url=harvest_object.job.source.url.strip('/'),
|
||||
harvest_job_id=harvest_object.job.id,
|
||||
harvest_object_id=harvest_object.id,
|
||||
dataset_id=package_dict['id'])
|
||||
package_dict['extras'][key] = value
|
||||
|
||||
result = self._create_or_update_package(package_dict,harvest_object)
|
||||
|
||||
if result and self.config.get('read_only',False) == True:
|
||||
|
|
Loading…
Reference in New Issue