Merge pull request #251 from ckan/249-default-tag
Fix default_groups, plus fix docs for default_tags
This commit is contained in:
commit
f2d8a5f8cc
15
README.rst
15
README.rst
|
@ -289,12 +289,11 @@ field. The currently supported configuration options are:
|
|||
the CKAN API. Default is 2.
|
||||
|
||||
* default_tags: A list of tags that will be added to all harvested datasets.
|
||||
Tags don't need to previously exist.
|
||||
Tags don't need to previously exist. This field takes a list of tag dicts
|
||||
(see example), which allows you to optinally specify a vocabulary.
|
||||
|
||||
* default_groups: A list of groups to which the harvested datasets will be
|
||||
added to. The groups must exist. Note that you must use ids or names to
|
||||
define the groups according to the API version you defined (names for version
|
||||
1, ids for version 2).
|
||||
* default_groups: A list of group IDs or names to which the harvested datasets
|
||||
will be added to. The groups must exist.
|
||||
|
||||
* default_extras: A dictionary of key value pairs that will be added to extras
|
||||
of the harvested datasets. You can use the following replacement strings,
|
||||
|
@ -367,9 +366,9 @@ the configuration field)::
|
|||
|
||||
{
|
||||
"api_version": 1,
|
||||
"default_tags":["new-tag-1","new-tag-2"],
|
||||
"default_groups":["my-own-group"],
|
||||
"default_extras":{"new_extra":"Test","harvest_url":"{harvest_source_url}/dataset/{dataset_id}"},
|
||||
"default_tags": [{"name": "geo"}, {"name": "namibia"],
|
||||
"default_groups": ["science", "spend-data"],
|
||||
"default_extras": {"encoding":"utf8", "harvest_url": "{harvest_source_url}/dataset/{dataset_id}"},
|
||||
"override_extras": true,
|
||||
"organizations_filter_include": [],
|
||||
"organizations_filter_exclude": ["remote-organization"],
|
||||
|
|
|
@ -6,7 +6,6 @@ import socket
|
|||
|
||||
from sqlalchemy import exists
|
||||
|
||||
from ckan.lib.base import c
|
||||
from ckan import model
|
||||
from ckan.logic import ValidationError, NotFound, get_action
|
||||
from ckan.lib.helpers import json
|
||||
|
@ -68,9 +67,7 @@ class CKANHarvester(HarvesterBase):
|
|||
data = json.loads(content)
|
||||
if self.action_api_version == 3:
|
||||
return data.pop('result')
|
||||
|
||||
return data
|
||||
|
||||
except (ContentFetchError, ValueError):
|
||||
log.debug('Could not fetch/decode remote group')
|
||||
raise RemoteResourceError('Could not fetch/decode remote group')
|
||||
|
@ -121,17 +118,28 @@ class CKANHarvester(HarvesterBase):
|
|||
if 'default_tags' in config_obj:
|
||||
if not isinstance(config_obj['default_tags'], list):
|
||||
raise ValueError('default_tags must be a list')
|
||||
if config_obj['default_tags'] and \
|
||||
not isinstance(config_obj['default_tags'][0], dict):
|
||||
raise ValueError('default_tags must be a list of '
|
||||
'dictionaries')
|
||||
|
||||
if 'default_groups' in config_obj:
|
||||
if not isinstance(config_obj['default_groups'], list):
|
||||
raise ValueError('default_groups must be a list')
|
||||
raise ValueError('default_groups must be a *list* of group'
|
||||
' names/ids')
|
||||
if config_obj['default_groups'] and \
|
||||
not isinstance(config_obj['default_groups'][0], str):
|
||||
raise ValueError('default_groups must be a list of group '
|
||||
'names/ids (i.e. strings)')
|
||||
|
||||
# Check if default groups exist
|
||||
context = {'model': model, 'user': c.user}
|
||||
for group_name in config_obj['default_groups']:
|
||||
context = {'model': model, 'user': toolkit.c.user}
|
||||
self.default_group_dicts = []
|
||||
for group_name_or_id in config_obj['default_groups']:
|
||||
try:
|
||||
group = get_action('group_show')(
|
||||
context, {'id': group_name})
|
||||
context, {'id': group_name_or_id})
|
||||
self.default_group_dicts.append(group)
|
||||
except NotFound, e:
|
||||
raise ValueError('Default group not found')
|
||||
|
||||
|
@ -141,7 +149,7 @@ class CKANHarvester(HarvesterBase):
|
|||
|
||||
if 'user' in config_obj:
|
||||
# Check if user exists
|
||||
context = {'model': model, 'user': c.user}
|
||||
context = {'model': model, 'user': toolkit.c.user}
|
||||
try:
|
||||
user = get_action('user_show')(
|
||||
context, {'id': config_obj.get('user')})
|
||||
|
@ -481,9 +489,10 @@ class CKANHarvester(HarvesterBase):
|
|||
if default_groups:
|
||||
if not 'groups' in package_dict:
|
||||
package_dict['groups'] = []
|
||||
existing_group_ids = [g['id'] for g in package_dict['groups']]
|
||||
package_dict['groups'].extend(
|
||||
[g for g in default_groups
|
||||
if g not in package_dict['groups']])
|
||||
[g for g in self.default_group_dicts
|
||||
if g['id'] not in existing_group_ids])
|
||||
|
||||
# Set default extras if needed
|
||||
default_extras = self.config.get('default_extras', {})
|
||||
|
|
|
@ -197,8 +197,16 @@ class HarvestObjectError(HarvestDomainObject):
|
|||
stage=stage, line=line)
|
||||
try:
|
||||
err.save()
|
||||
except InvalidRequestError:
|
||||
Session.rollback()
|
||||
except InvalidRequestError, e:
|
||||
# Clear any in-progress sqlalchemy transactions
|
||||
try:
|
||||
Session.rollback()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
Session.remove()
|
||||
except:
|
||||
pass
|
||||
err.save()
|
||||
finally:
|
||||
log_message = '{0}, line {1}'.format(message, line) \
|
||||
|
|
|
@ -4,6 +4,7 @@ from nose.tools import assert_equal, assert_raises
|
|||
import json
|
||||
from mock import patch, MagicMock
|
||||
|
||||
|
||||
try:
|
||||
from ckan.tests.helpers import reset_db, call_action
|
||||
from ckan.tests.factories import Organization, Group
|
||||
|
@ -221,3 +222,93 @@ class TestCkanHarvester(object):
|
|||
harvester=CKANHarvester())
|
||||
assert not results_by_guid
|
||||
assert not was_last_job_considered_error_free()
|
||||
|
||||
def test_default_tags(self):
|
||||
config = {'default_tags': [{'name': 'geo'}]}
|
||||
results_by_guid = run_harvest(
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
||||
tags = results_by_guid['dataset1-id']['dataset']['tags']
|
||||
tag_names = [tag['name'] for tag in tags]
|
||||
assert 'geo' in tag_names
|
||||
|
||||
def test_default_tags_invalid(self):
|
||||
config = {'default_tags': ['geo']} # should be list of dicts
|
||||
assert_raises(
|
||||
run_harvest,
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
||||
|
||||
def test_default_groups(self):
|
||||
Group(id='group1-id', name='group1')
|
||||
Group(id='group2-id', name='group2')
|
||||
Group(id='group3-id', name='group3')
|
||||
|
||||
config = {'default_groups': ['group2-id', 'group3'],
|
||||
'remote_groups': 'only_local'}
|
||||
tmp_c = toolkit.c
|
||||
try:
|
||||
# c.user is used by the validation (annoying),
|
||||
# however patch doesn't work because it's a weird
|
||||
# StackedObjectProxy, so we swap it manually
|
||||
toolkit.c = MagicMock(user='')
|
||||
results_by_guid = run_harvest(
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
||||
finally:
|
||||
toolkit.c = tmp_c
|
||||
assert_equal(results_by_guid['dataset1-id']['errors'], [])
|
||||
groups = results_by_guid['dataset1-id']['dataset']['groups']
|
||||
group_names = set(group['name'] for group in groups)
|
||||
# group1 comes from the harvested dataset
|
||||
# group2 & 3 come from the default_groups
|
||||
assert_equal(group_names, set(('group1', 'group2', 'group3')))
|
||||
|
||||
def test_default_groups_invalid(self):
|
||||
Group(id='group2-id', name='group2')
|
||||
|
||||
# should be list of strings
|
||||
config = {'default_tags': [{'name': 'group2'}]}
|
||||
assert_raises(
|
||||
run_harvest,
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
||||
|
||||
def test_default_extras(self):
|
||||
config = {
|
||||
'default_extras': {
|
||||
'encoding': 'utf8',
|
||||
'harvest_url': '{harvest_source_url}/dataset/{dataset_id}'
|
||||
}}
|
||||
tmp_c = toolkit.c
|
||||
try:
|
||||
# c.user is used by the validation (annoying),
|
||||
# however patch doesn't work because it's a weird
|
||||
# StackedObjectProxy, so we swap it manually
|
||||
toolkit.c = MagicMock(user='')
|
||||
results_by_guid = run_harvest(
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
||||
finally:
|
||||
toolkit.c = tmp_c
|
||||
assert_equal(results_by_guid['dataset1-id']['errors'], [])
|
||||
extras = results_by_guid['dataset1-id']['dataset']['extras']
|
||||
extras_dict = dict((e['key'], e['value']) for e in extras)
|
||||
assert_equal(extras_dict['encoding'], 'utf8')
|
||||
assert_equal(extras_dict['harvest_url'],
|
||||
'http://localhost:8998/dataset/dataset1-id')
|
||||
|
||||
def test_default_extras_invalid(self):
|
||||
config = {
|
||||
'default_extras': 'utf8', # value should be a dict
|
||||
}
|
||||
assert_raises(
|
||||
run_harvest,
|
||||
url='http://localhost:%s' % mock_ckan.PORT,
|
||||
harvester=CKANHarvester(),
|
||||
config=json.dumps(config))
|
|
@ -10,7 +10,8 @@ def run_harvest(url, harvester, config=''):
|
|||
Queues are avoided as they are a pain in tests.
|
||||
'''
|
||||
# User creates a harvest source
|
||||
source = HarvestSourceObj(url=url, config=config)
|
||||
source = HarvestSourceObj(url=url, config=config,
|
||||
source_type=harvester.info()['name'])
|
||||
|
||||
# User triggers a harvest, which is the creation of a harvest job.
|
||||
# We set run=False so that it doesn't put it on the gather queue.
|
||||
|
|
1
setup.py
1
setup.py
|
@ -36,6 +36,7 @@ setup(
|
|||
ckan_harvester=ckanext.harvest.harvesters:CKANHarvester
|
||||
[ckan.test_plugins]
|
||||
test_harvester=ckanext.harvest.tests.test_queue:MockHarvester
|
||||
test_harvester2=ckanext.harvest.tests.test_queue2:MockHarvester
|
||||
test_action_harvester=ckanext.harvest.tests.test_action:MockHarvesterForActionTests
|
||||
[paste.paster_command]
|
||||
harvester = ckanext.harvest.commands.harvester:Harvester
|
||||
|
|
|
@ -15,7 +15,7 @@ port = 5000
|
|||
use = config:../ckan/test-core.ini
|
||||
# Here we hard-code the database and a flag to make default tests
|
||||
# run fast.
|
||||
ckan.plugins = harvest ckan_harvester test_harvester test_action_harvester
|
||||
ckan.plugins = harvest ckan_harvester test_harvester test_harvester2 test_action_harvester
|
||||
ckan.harvest.mq.type = redis
|
||||
ckan.legacy_templates = false
|
||||
# NB: other test configuration should go in test-core.ini, which is
|
||||
|
|
Loading…
Reference in New Issue