diff --git a/README.rst b/README.rst index 773d074..06ee23e 100644 --- a/README.rst +++ b/README.rst @@ -206,6 +206,12 @@ field. The currently supported configuration options are: present in the local CKAN. Setting it to 'create' will make an attempt to create the organizations by copying the details from the remote CKAN. +* clean_tags: By default, tags are not stripped of accent characters, spaces and + capital letters for display. If this option is set to True, accent characters + will be replaced by their ascii equivalents, capital letters replaced by + lower-case ones, and spaces replaced with dashes. Setting this option to False + gives the same effect as leaving it unset. + Here is an example of a configuration object (the one that must be entered in the configuration field):: diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 5429543..138a3ce 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -20,17 +20,18 @@ from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, from ckan.plugins.core import SingletonPlugin, implements from ckanext.harvest.interfaces import IHarvester -from pylons import config log = logging.getLogger(__name__) def munge_tag(tag): - clean_tags = config.get('ckanext.harvest.ckanharvester.clean_tags') - if clean_tags: - tag = substitute_ascii_equivalents(tag) - tag = tag.lower().strip() - return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + if self.config: + if self.config.get('clean_tags', False): + tag = substitute_ascii_equivalents(tag) + tag = tag.lower().strip() + return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + else: + return tag else: return tag