From 4516bfe44e5c7da4bbb7947cae92152b191e771f Mon Sep 17 00:00:00 2001 From: David Read Date: Mon, 15 Feb 2016 13:50:18 +0000 Subject: [PATCH 1/2] PEP8 and lint, extracted from PR158 --- README.rst | 10 ++- ckanext/harvest/harvesters/base.py | 11 ++- ckanext/harvest/harvesters/ckanharvester.py | 90 ++++++++++++--------- 3 files changed, 61 insertions(+), 50 deletions(-) diff --git a/README.rst b/README.rst index da72490..e07ccc5 100644 --- a/README.rst +++ b/README.rst @@ -585,7 +585,8 @@ following steps with the one you are using. describe the tasks that need to be monitored. This configuration files are stored in ``/etc/supervisor/conf.d``. - Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents:: + Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and + copy the following contents:: ; =============================== @@ -676,10 +677,11 @@ following steps with the one you are using. sudo crontab -e -u ckan - Note that we are running this command as the same user we configured the processes to be run with - (`ckan` in our example). + Note that we are running this command as the same user we configured the + processes to be run with (`ckan` in our example). - Paste this line into your crontab, again replacing the paths to paster and the ini file with yours:: + Paste this line into your crontab, again replacing the paths to paster and + the ini file with yours:: # m h dom mon dow command */15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 90a1b14..9f4c485 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -8,7 +8,6 @@ from pylons import config from ckan import plugins as p from ckan import model from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH -from ckan.logic import ValidationError, NotFound, get_action from ckan.logic.schema import default_create_package_schema from ckan.lib.navl.validators import ignore_missing, ignore @@ -288,7 +287,7 @@ class HarvesterBase(SingletonPlugin): context.update({'id':package_dict['id']}) package_dict.setdefault('name', existing_package_dict['name']) - new_package = get_action('package_update_rest')(context, package_dict) + new_package = p.toolkit.get_action('package_update_rest')(context, package_dict) else: log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid) @@ -309,7 +308,7 @@ class HarvesterBase(SingletonPlugin): harvest_object.current = True harvest_object.save() - except NotFound: + except p.toolkit.NotFound: # Package needs to be created # Get rid of auth audit on the context otherwise we'll get an @@ -333,13 +332,13 @@ class HarvesterBase(SingletonPlugin): model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') model.Session.flush() - new_package = get_action('package_create_rest')(context, package_dict) + new_package = p.toolkit.get_action('package_create_rest')(context, package_dict) Session.commit() return True - except ValidationError,e: + except p.toolkit.ValidationError, e: log.exception(e) self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import') except Exception, e: @@ -352,5 +351,5 @@ class HarvesterBase(SingletonPlugin): data_dict = {'id': package_dict['id']} package_show_context = {'model': model, 'session': Session, 'ignore_auth': True} - return get_action('package_show')( + return p.toolkit.get_action('package_show')( package_show_context, data_dict) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 9099b38..cedb843 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -17,6 +17,7 @@ log = logging.getLogger(__name__) from base import HarvesterBase + class CKANHarvester(HarvesterBase): ''' A Harvester for CKAN instances @@ -61,20 +62,22 @@ class CKANHarvester(HarvesterBase): content = self._get_content(url) return json.loads(content) except (ContentFetchError, ValueError): - log.debug('Could not fetch/decode remote group'); + log.debug('Could not fetch/decode remote group') raise RemoteResourceError('Could not fetch/decode remote group') def _get_organization(self, base_url, org_name): - url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name + url = base_url + self._get_action_api_offset() + \ + '/organization_show?id=' + org_name try: content = self._get_content(url) content_dict = json.loads(content) return content_dict['result'] except (ContentFetchError, ValueError, KeyError): - log.debug('Could not fetch/decode remote group'); - raise RemoteResourceError('Could not fetch/decode remote organization') + log.debug('Could not fetch/decode remote group') + raise RemoteResourceError( + 'Could not fetch/decode remote organization') - def _set_config(self,config_str): + def _set_config(self, config_str): if config_str: self.config = json.loads(config_str) if 'api_version' in self.config: @@ -89,10 +92,10 @@ class CKANHarvester(HarvesterBase): 'name': 'ckan', 'title': 'CKAN', 'description': 'Harvests remote CKAN instances', - 'form_config_interface':'Text' + 'form_config_interface': 'Text' } - def validate_config(self,config): + def validate_config(self, config): if not config: return config @@ -106,46 +109,48 @@ class CKANHarvester(HarvesterBase): raise ValueError('api_version must be an integer') if 'default_tags' in config_obj: - if not isinstance(config_obj['default_tags'],list): + if not isinstance(config_obj['default_tags'], list): raise ValueError('default_tags must be a list') if 'default_groups' in config_obj: - if not isinstance(config_obj['default_groups'],list): + if not isinstance(config_obj['default_groups'], list): raise ValueError('default_groups must be a list') # Check if default groups exist - context = {'model':model,'user':c.user} + context = {'model': model, 'user': c.user} for group_name in config_obj['default_groups']: try: - group = get_action('group_show')(context,{'id':group_name}) - except NotFound,e: + group = get_action('group_show')( + context, {'id': group_name}) + except NotFound, e: raise ValueError('Default group not found') if 'default_extras' in config_obj: - if not isinstance(config_obj['default_extras'],dict): + if not isinstance(config_obj['default_extras'], dict): raise ValueError('default_extras must be a dictionary') if 'user' in config_obj: # Check if user exists - context = {'model':model,'user':c.user} + context = {'model': model, 'user': c.user} try: - user = get_action('user_show')(context,{'id':config_obj.get('user')}) - except NotFound,e: + user = get_action('user_show')( + context, {'id': config_obj.get('user')}) + except NotFound: raise ValueError('User not found') - for key in ('read_only','force_all'): + for key in ('read_only', 'force_all'): if key in config_obj: - if not isinstance(config_obj[key],bool): + if not isinstance(config_obj[key], bool): raise ValueError('%s must be boolean' % key) - except ValueError,e: + except ValueError, e: raise e return config - - def gather_stage(self,harvest_job): - log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url) + def gather_stage(self, harvest_job): + log.debug('In CKANHarvester gather_stage (%s)', + harvest_job.source.url) get_all_packages = True package_ids = [] @@ -284,17 +289,20 @@ class CKANHarvester(HarvesterBase): harvest_object.save() return True - def import_stage(self,harvest_object): + + def import_stage(self, harvest_object): log.debug('In CKANHarvester import_stage') - context = {'model': model, 'session': Session, 'user': self._get_user_name()} + context = {'model': model, 'session': Session, + 'user': self._get_user_name()} if not harvest_object: log.error('No harvest object received') return False if harvest_object.content is None: - self._save_object_error('Empty content for object %s' % harvest_object.id, - harvest_object, 'Import') + self._save_object_error('Empty content for object %s' % + harvest_object.id, + harvest_object, 'Import') return False self._set_config(harvest_object.job.source.config) @@ -307,11 +315,12 @@ class CKANHarvester(HarvesterBase): return True # Set default tags if needed - default_tags = self.config.get('default_tags',[]) + default_tags = self.config.get('default_tags', []) if default_tags: if not 'tags' in package_dict: package_dict['tags'] = [] - package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']]) + package_dict['tags'].extend( + [t for t in default_tags if t not in package_dict['tags']]) remote_groups = self.config.get('remote_groups', None) if not remote_groups in ('only_local', 'create'): @@ -333,19 +342,19 @@ class CKANHarvester(HarvesterBase): else: validated_groups.append(group['id']) except NotFound, e: - log.info('Group %s is not available' % group_name) + log.info('Group %s is not available', group_name) if remote_groups == 'create': try: group = self._get_group(harvest_object.source.url, group_name) except RemoteResourceError: - log.error('Could not get remote group %s' % group_name) + log.error('Could not get remote group %s', group_name) continue for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']: group.pop(key, None) get_action('group_create')(context, group) - log.info('Group %s has been newly created' % group_name) + log.info('Group %s has been newly created', group_name) if self.api_version == 1: validated_groups.append(group['name']) else: @@ -377,7 +386,7 @@ class CKANHarvester(HarvesterBase): org = get_action('organization_show')(context, data_dict) validated_org = org['id'] except NotFound, e: - log.info('Organization %s is not available' % remote_org) + log.info('Organization %s is not available', remote_org) if remote_orgs == 'create': try: try: @@ -390,10 +399,10 @@ class CKANHarvester(HarvesterBase): for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']: org.pop(key, None) get_action('organization_create')(context, org) - log.info('Organization %s has been newly created' % remote_org) + log.info('Organization %s has been newly created', remote_org) validated_org = org['id'] except (RemoteResourceError, ValidationError): - log.error('Could not get remote org %s' % remote_org) + log.error('Could not get remote org %s', remote_org) package_dict['owner_org'] = validated_org or local_org @@ -417,12 +426,12 @@ class CKANHarvester(HarvesterBase): del package_dict['extras'][key] # Set default extras if needed - default_extras = self.config.get('default_extras',{}) + default_extras = self.config.get('default_extras', {}) if default_extras: - override_extras = self.config.get('override_extras',False) + override_extras = self.config.get('override_extras', False) if not 'extras' in package_dict: package_dict['extras'] = {} - for key,value in default_extras.iteritems(): + for key, value in default_extras.iteritems(): if not key in package_dict['extras'] or override_extras: # Look for replacement strings if isinstance(value,basestring): @@ -466,9 +475,10 @@ class CKANHarvester(HarvesterBase): pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER) return result - except ValidationError,e: - self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict), - harvest_object, 'Import') + except ValidationError, e: + self._save_object_error('Invalid package with GUID %s: %r' % + (harvest_object.guid, e.error_dict), + harvest_object, 'Import') except Exception, e: self._save_object_error('%r'%e,harvest_object,'Import') From bf0d1fd7797ab92db6c206f49e1e75bef74728e2 Mon Sep 17 00:00:00 2001 From: David Read Date: Mon, 15 Feb 2016 13:54:58 +0000 Subject: [PATCH 2/2] Fix name error --- ckanext/harvest/harvesters/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 9f4c485..d3fd770 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -308,7 +308,7 @@ class HarvesterBase(SingletonPlugin): harvest_object.current = True harvest_object.save() - except p.toolkit.NotFound: + except p.toolkit.ObjectNotFound: # Package needs to be created # Get rid of auth audit on the context otherwise we'll get an