Merge pull request #251 from ckan/249-default-tag
Fix default_groups, plus fix docs for default_tags
This commit is contained in:
commit
f2d8a5f8cc
15
README.rst
15
README.rst
|
@ -289,12 +289,11 @@ field. The currently supported configuration options are:
|
||||||
the CKAN API. Default is 2.
|
the CKAN API. Default is 2.
|
||||||
|
|
||||||
* default_tags: A list of tags that will be added to all harvested datasets.
|
* default_tags: A list of tags that will be added to all harvested datasets.
|
||||||
Tags don't need to previously exist.
|
Tags don't need to previously exist. This field takes a list of tag dicts
|
||||||
|
(see example), which allows you to optinally specify a vocabulary.
|
||||||
|
|
||||||
* default_groups: A list of groups to which the harvested datasets will be
|
* default_groups: A list of group IDs or names to which the harvested datasets
|
||||||
added to. The groups must exist. Note that you must use ids or names to
|
will be added to. The groups must exist.
|
||||||
define the groups according to the API version you defined (names for version
|
|
||||||
1, ids for version 2).
|
|
||||||
|
|
||||||
* default_extras: A dictionary of key value pairs that will be added to extras
|
* default_extras: A dictionary of key value pairs that will be added to extras
|
||||||
of the harvested datasets. You can use the following replacement strings,
|
of the harvested datasets. You can use the following replacement strings,
|
||||||
|
@ -367,9 +366,9 @@ the configuration field)::
|
||||||
|
|
||||||
{
|
{
|
||||||
"api_version": 1,
|
"api_version": 1,
|
||||||
"default_tags":["new-tag-1","new-tag-2"],
|
"default_tags": [{"name": "geo"}, {"name": "namibia"],
|
||||||
"default_groups":["my-own-group"],
|
"default_groups": ["science", "spend-data"],
|
||||||
"default_extras":{"new_extra":"Test","harvest_url":"{harvest_source_url}/dataset/{dataset_id}"},
|
"default_extras": {"encoding":"utf8", "harvest_url": "{harvest_source_url}/dataset/{dataset_id}"},
|
||||||
"override_extras": true,
|
"override_extras": true,
|
||||||
"organizations_filter_include": [],
|
"organizations_filter_include": [],
|
||||||
"organizations_filter_exclude": ["remote-organization"],
|
"organizations_filter_exclude": ["remote-organization"],
|
||||||
|
|
|
@ -6,7 +6,6 @@ import socket
|
||||||
|
|
||||||
from sqlalchemy import exists
|
from sqlalchemy import exists
|
||||||
|
|
||||||
from ckan.lib.base import c
|
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.logic import ValidationError, NotFound, get_action
|
from ckan.logic import ValidationError, NotFound, get_action
|
||||||
from ckan.lib.helpers import json
|
from ckan.lib.helpers import json
|
||||||
|
@ -68,9 +67,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
if self.action_api_version == 3:
|
if self.action_api_version == 3:
|
||||||
return data.pop('result')
|
return data.pop('result')
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
except (ContentFetchError, ValueError):
|
except (ContentFetchError, ValueError):
|
||||||
log.debug('Could not fetch/decode remote group')
|
log.debug('Could not fetch/decode remote group')
|
||||||
raise RemoteResourceError('Could not fetch/decode remote group')
|
raise RemoteResourceError('Could not fetch/decode remote group')
|
||||||
|
@ -121,17 +118,28 @@ class CKANHarvester(HarvesterBase):
|
||||||
if 'default_tags' in config_obj:
|
if 'default_tags' in config_obj:
|
||||||
if not isinstance(config_obj['default_tags'], list):
|
if not isinstance(config_obj['default_tags'], list):
|
||||||
raise ValueError('default_tags must be a list')
|
raise ValueError('default_tags must be a list')
|
||||||
|
if config_obj['default_tags'] and \
|
||||||
|
not isinstance(config_obj['default_tags'][0], dict):
|
||||||
|
raise ValueError('default_tags must be a list of '
|
||||||
|
'dictionaries')
|
||||||
|
|
||||||
if 'default_groups' in config_obj:
|
if 'default_groups' in config_obj:
|
||||||
if not isinstance(config_obj['default_groups'], list):
|
if not isinstance(config_obj['default_groups'], list):
|
||||||
raise ValueError('default_groups must be a list')
|
raise ValueError('default_groups must be a *list* of group'
|
||||||
|
' names/ids')
|
||||||
|
if config_obj['default_groups'] and \
|
||||||
|
not isinstance(config_obj['default_groups'][0], str):
|
||||||
|
raise ValueError('default_groups must be a list of group '
|
||||||
|
'names/ids (i.e. strings)')
|
||||||
|
|
||||||
# Check if default groups exist
|
# Check if default groups exist
|
||||||
context = {'model': model, 'user': c.user}
|
context = {'model': model, 'user': toolkit.c.user}
|
||||||
for group_name in config_obj['default_groups']:
|
self.default_group_dicts = []
|
||||||
|
for group_name_or_id in config_obj['default_groups']:
|
||||||
try:
|
try:
|
||||||
group = get_action('group_show')(
|
group = get_action('group_show')(
|
||||||
context, {'id': group_name})
|
context, {'id': group_name_or_id})
|
||||||
|
self.default_group_dicts.append(group)
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
raise ValueError('Default group not found')
|
raise ValueError('Default group not found')
|
||||||
|
|
||||||
|
@ -141,7 +149,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
|
|
||||||
if 'user' in config_obj:
|
if 'user' in config_obj:
|
||||||
# Check if user exists
|
# Check if user exists
|
||||||
context = {'model': model, 'user': c.user}
|
context = {'model': model, 'user': toolkit.c.user}
|
||||||
try:
|
try:
|
||||||
user = get_action('user_show')(
|
user = get_action('user_show')(
|
||||||
context, {'id': config_obj.get('user')})
|
context, {'id': config_obj.get('user')})
|
||||||
|
@ -481,9 +489,10 @@ class CKANHarvester(HarvesterBase):
|
||||||
if default_groups:
|
if default_groups:
|
||||||
if not 'groups' in package_dict:
|
if not 'groups' in package_dict:
|
||||||
package_dict['groups'] = []
|
package_dict['groups'] = []
|
||||||
|
existing_group_ids = [g['id'] for g in package_dict['groups']]
|
||||||
package_dict['groups'].extend(
|
package_dict['groups'].extend(
|
||||||
[g for g in default_groups
|
[g for g in self.default_group_dicts
|
||||||
if g not in package_dict['groups']])
|
if g['id'] not in existing_group_ids])
|
||||||
|
|
||||||
# Set default extras if needed
|
# Set default extras if needed
|
||||||
default_extras = self.config.get('default_extras', {})
|
default_extras = self.config.get('default_extras', {})
|
||||||
|
|
|
@ -197,8 +197,16 @@ class HarvestObjectError(HarvestDomainObject):
|
||||||
stage=stage, line=line)
|
stage=stage, line=line)
|
||||||
try:
|
try:
|
||||||
err.save()
|
err.save()
|
||||||
except InvalidRequestError:
|
except InvalidRequestError, e:
|
||||||
|
# Clear any in-progress sqlalchemy transactions
|
||||||
|
try:
|
||||||
Session.rollback()
|
Session.rollback()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
Session.remove()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
err.save()
|
err.save()
|
||||||
finally:
|
finally:
|
||||||
log_message = '{0}, line {1}'.format(message, line) \
|
log_message = '{0}, line {1}'.format(message, line) \
|
||||||
|
|
|
@ -4,6 +4,7 @@ from nose.tools import assert_equal, assert_raises
|
||||||
import json
|
import json
|
||||||
from mock import patch, MagicMock
|
from mock import patch, MagicMock
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ckan.tests.helpers import reset_db, call_action
|
from ckan.tests.helpers import reset_db, call_action
|
||||||
from ckan.tests.factories import Organization, Group
|
from ckan.tests.factories import Organization, Group
|
||||||
|
@ -221,3 +222,93 @@ class TestCkanHarvester(object):
|
||||||
harvester=CKANHarvester())
|
harvester=CKANHarvester())
|
||||||
assert not results_by_guid
|
assert not results_by_guid
|
||||||
assert not was_last_job_considered_error_free()
|
assert not was_last_job_considered_error_free()
|
||||||
|
|
||||||
|
def test_default_tags(self):
|
||||||
|
config = {'default_tags': [{'name': 'geo'}]}
|
||||||
|
results_by_guid = run_harvest(
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
||||||
|
tags = results_by_guid['dataset1-id']['dataset']['tags']
|
||||||
|
tag_names = [tag['name'] for tag in tags]
|
||||||
|
assert 'geo' in tag_names
|
||||||
|
|
||||||
|
def test_default_tags_invalid(self):
|
||||||
|
config = {'default_tags': ['geo']} # should be list of dicts
|
||||||
|
assert_raises(
|
||||||
|
run_harvest,
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
||||||
|
|
||||||
|
def test_default_groups(self):
|
||||||
|
Group(id='group1-id', name='group1')
|
||||||
|
Group(id='group2-id', name='group2')
|
||||||
|
Group(id='group3-id', name='group3')
|
||||||
|
|
||||||
|
config = {'default_groups': ['group2-id', 'group3'],
|
||||||
|
'remote_groups': 'only_local'}
|
||||||
|
tmp_c = toolkit.c
|
||||||
|
try:
|
||||||
|
# c.user is used by the validation (annoying),
|
||||||
|
# however patch doesn't work because it's a weird
|
||||||
|
# StackedObjectProxy, so we swap it manually
|
||||||
|
toolkit.c = MagicMock(user='')
|
||||||
|
results_by_guid = run_harvest(
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
||||||
|
finally:
|
||||||
|
toolkit.c = tmp_c
|
||||||
|
assert_equal(results_by_guid['dataset1-id']['errors'], [])
|
||||||
|
groups = results_by_guid['dataset1-id']['dataset']['groups']
|
||||||
|
group_names = set(group['name'] for group in groups)
|
||||||
|
# group1 comes from the harvested dataset
|
||||||
|
# group2 & 3 come from the default_groups
|
||||||
|
assert_equal(group_names, set(('group1', 'group2', 'group3')))
|
||||||
|
|
||||||
|
def test_default_groups_invalid(self):
|
||||||
|
Group(id='group2-id', name='group2')
|
||||||
|
|
||||||
|
# should be list of strings
|
||||||
|
config = {'default_tags': [{'name': 'group2'}]}
|
||||||
|
assert_raises(
|
||||||
|
run_harvest,
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
||||||
|
|
||||||
|
def test_default_extras(self):
|
||||||
|
config = {
|
||||||
|
'default_extras': {
|
||||||
|
'encoding': 'utf8',
|
||||||
|
'harvest_url': '{harvest_source_url}/dataset/{dataset_id}'
|
||||||
|
}}
|
||||||
|
tmp_c = toolkit.c
|
||||||
|
try:
|
||||||
|
# c.user is used by the validation (annoying),
|
||||||
|
# however patch doesn't work because it's a weird
|
||||||
|
# StackedObjectProxy, so we swap it manually
|
||||||
|
toolkit.c = MagicMock(user='')
|
||||||
|
results_by_guid = run_harvest(
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
||||||
|
finally:
|
||||||
|
toolkit.c = tmp_c
|
||||||
|
assert_equal(results_by_guid['dataset1-id']['errors'], [])
|
||||||
|
extras = results_by_guid['dataset1-id']['dataset']['extras']
|
||||||
|
extras_dict = dict((e['key'], e['value']) for e in extras)
|
||||||
|
assert_equal(extras_dict['encoding'], 'utf8')
|
||||||
|
assert_equal(extras_dict['harvest_url'],
|
||||||
|
'http://localhost:8998/dataset/dataset1-id')
|
||||||
|
|
||||||
|
def test_default_extras_invalid(self):
|
||||||
|
config = {
|
||||||
|
'default_extras': 'utf8', # value should be a dict
|
||||||
|
}
|
||||||
|
assert_raises(
|
||||||
|
run_harvest,
|
||||||
|
url='http://localhost:%s' % mock_ckan.PORT,
|
||||||
|
harvester=CKANHarvester(),
|
||||||
|
config=json.dumps(config))
|
|
@ -10,7 +10,8 @@ def run_harvest(url, harvester, config=''):
|
||||||
Queues are avoided as they are a pain in tests.
|
Queues are avoided as they are a pain in tests.
|
||||||
'''
|
'''
|
||||||
# User creates a harvest source
|
# User creates a harvest source
|
||||||
source = HarvestSourceObj(url=url, config=config)
|
source = HarvestSourceObj(url=url, config=config,
|
||||||
|
source_type=harvester.info()['name'])
|
||||||
|
|
||||||
# User triggers a harvest, which is the creation of a harvest job.
|
# User triggers a harvest, which is the creation of a harvest job.
|
||||||
# We set run=False so that it doesn't put it on the gather queue.
|
# We set run=False so that it doesn't put it on the gather queue.
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -36,6 +36,7 @@ setup(
|
||||||
ckan_harvester=ckanext.harvest.harvesters:CKANHarvester
|
ckan_harvester=ckanext.harvest.harvesters:CKANHarvester
|
||||||
[ckan.test_plugins]
|
[ckan.test_plugins]
|
||||||
test_harvester=ckanext.harvest.tests.test_queue:MockHarvester
|
test_harvester=ckanext.harvest.tests.test_queue:MockHarvester
|
||||||
|
test_harvester2=ckanext.harvest.tests.test_queue2:MockHarvester
|
||||||
test_action_harvester=ckanext.harvest.tests.test_action:MockHarvesterForActionTests
|
test_action_harvester=ckanext.harvest.tests.test_action:MockHarvesterForActionTests
|
||||||
[paste.paster_command]
|
[paste.paster_command]
|
||||||
harvester = ckanext.harvest.commands.harvester:Harvester
|
harvester = ckanext.harvest.commands.harvester:Harvester
|
||||||
|
|
|
@ -15,7 +15,7 @@ port = 5000
|
||||||
use = config:../ckan/test-core.ini
|
use = config:../ckan/test-core.ini
|
||||||
# Here we hard-code the database and a flag to make default tests
|
# Here we hard-code the database and a flag to make default tests
|
||||||
# run fast.
|
# run fast.
|
||||||
ckan.plugins = harvest ckan_harvester test_harvester test_action_harvester
|
ckan.plugins = harvest ckan_harvester test_harvester test_harvester2 test_action_harvester
|
||||||
ckan.harvest.mq.type = redis
|
ckan.harvest.mq.type = redis
|
||||||
ckan.legacy_templates = false
|
ckan.legacy_templates = false
|
||||||
# NB: other test configuration should go in test-core.ini, which is
|
# NB: other test configuration should go in test-core.ini, which is
|
||||||
|
|
Loading…
Reference in New Issue