From 7d71b0a00b8742bc79d6d227016beaf0849b886e Mon Sep 17 00:00:00 2001 From: Rachel Knowler Date: Wed, 29 Jan 2014 10:02:16 +0100 Subject: [PATCH 1/4] Wrap tag munging code in config option, defaulting to False. --- ckanext/harvest/harvesters/base.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index c2855e4..ad3a629 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -20,13 +20,20 @@ from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, from ckan.plugins.core import SingletonPlugin, implements from ckanext.harvest.interfaces import IHarvester +from pylons import config + log = logging.getLogger(__name__) def munge_tag(tag): - tag = substitute_ascii_equivalents(tag) - tag = tag.lower().strip() - return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + clean_tags = config.get('ckanext.harvest.ckanharvester.clean_tags') + if clean_tags: + tag = substitute_ascii_equivalents(tag) + tag = tag.lower().strip() + return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + else: + return tag + class HarvesterBase(SingletonPlugin): From 5e1aef1d08a95edac33af6e3a83d9287c053382d Mon Sep 17 00:00:00 2001 From: Rachel Knowler Date: Wed, 29 Jan 2014 10:06:32 +0100 Subject: [PATCH 2/4] Removed extra newline. --- ckanext/harvest/harvesters/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index ad3a629..5429543 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -35,7 +35,6 @@ def munge_tag(tag): return tag - class HarvesterBase(SingletonPlugin): ''' Generic class for harvesters with helper functions From 2ba990865383a1a557f009a06cb382ee6283cd47 Mon Sep 17 00:00:00 2001 From: Rachel Knowler Date: Wed, 29 Jan 2014 10:55:51 +0100 Subject: [PATCH 3/4] Config option to munge tags changed to be consistent with other config options in this extension, and noted in README. --- README.rst | 6 ++++++ ckanext/harvest/harvesters/base.py | 13 +++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 773d074..06ee23e 100644 --- a/README.rst +++ b/README.rst @@ -206,6 +206,12 @@ field. The currently supported configuration options are: present in the local CKAN. Setting it to 'create' will make an attempt to create the organizations by copying the details from the remote CKAN. +* clean_tags: By default, tags are not stripped of accent characters, spaces and + capital letters for display. If this option is set to True, accent characters + will be replaced by their ascii equivalents, capital letters replaced by + lower-case ones, and spaces replaced with dashes. Setting this option to False + gives the same effect as leaving it unset. + Here is an example of a configuration object (the one that must be entered in the configuration field):: diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 5429543..138a3ce 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -20,17 +20,18 @@ from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, from ckan.plugins.core import SingletonPlugin, implements from ckanext.harvest.interfaces import IHarvester -from pylons import config log = logging.getLogger(__name__) def munge_tag(tag): - clean_tags = config.get('ckanext.harvest.ckanharvester.clean_tags') - if clean_tags: - tag = substitute_ascii_equivalents(tag) - tag = tag.lower().strip() - return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + if self.config: + if self.config.get('clean_tags', False): + tag = substitute_ascii_equivalents(tag) + tag = tag.lower().strip() + return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + else: + return tag else: return tag From bf11e4d3309866edc1c43fc10776a6c6c1946ad8 Mon Sep 17 00:00:00 2001 From: Rachel Knowler Date: Mon, 10 Feb 2014 09:29:01 +0100 Subject: [PATCH 4/4] Moved clean_tags check into _create_or_update_package method. --- ckanext/harvest/harvesters/base.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 138a3ce..35c1881 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -25,15 +25,9 @@ log = logging.getLogger(__name__) def munge_tag(tag): - if self.config: - if self.config.get('clean_tags', False): - tag = substitute_ascii_equivalents(tag) - tag = tag.lower().strip() - return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') - else: - return tag - else: - return tag + tag = substitute_ascii_equivalents(tag) + tag = tag.lower().strip() + return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') class HarvesterBase(SingletonPlugin): @@ -157,10 +151,11 @@ class HarvesterBase(SingletonPlugin): 'ignore_auth': True, } - tags = package_dict.get('tags', []) - tags = [munge_tag(t) for t in tags] - tags = list(set(tags)) - package_dict['tags'] = tags + if self.config and self.config.get('clean_tags', False): + tags = package_dict.get('tags', []) + tags = [munge_tag(t) for t in tags] + tags = list(set(tags)) + package_dict['tags'] = tags # Check if package exists data_dict = {}