PEP8 and lint, extracted from PR158

This commit is contained in:
David Read 2016-02-15 13:50:18 +00:00
parent 6354ad5656
commit 4516bfe44e
3 changed files with 61 additions and 50 deletions

View File

@ -585,7 +585,8 @@ following steps with the one you are using.
describe the tasks that need to be monitored. This configuration files are describe the tasks that need to be monitored. This configuration files are
stored in ``/etc/supervisor/conf.d``. stored in ``/etc/supervisor/conf.d``.
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents:: Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and
copy the following contents::
; =============================== ; ===============================
@ -676,10 +677,11 @@ following steps with the one you are using.
sudo crontab -e -u ckan sudo crontab -e -u ckan
Note that we are running this command as the same user we configured the processes to be run with Note that we are running this command as the same user we configured the
(`ckan` in our example). processes to be run with (`ckan` in our example).
Paste this line into your crontab, again replacing the paths to paster and the ini file with yours:: Paste this line into your crontab, again replacing the paths to paster and
the ini file with yours::
# m h dom mon dow command # m h dom mon dow command
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini */15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini

View File

@ -8,7 +8,6 @@ from pylons import config
from ckan import plugins as p from ckan import plugins as p
from ckan import model from ckan import model
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
from ckan.logic import ValidationError, NotFound, get_action
from ckan.logic.schema import default_create_package_schema from ckan.logic.schema import default_create_package_schema
from ckan.lib.navl.validators import ignore_missing, ignore from ckan.lib.navl.validators import ignore_missing, ignore
@ -288,7 +287,7 @@ class HarvesterBase(SingletonPlugin):
context.update({'id':package_dict['id']}) context.update({'id':package_dict['id']})
package_dict.setdefault('name', package_dict.setdefault('name',
existing_package_dict['name']) existing_package_dict['name'])
new_package = get_action('package_update_rest')(context, package_dict) new_package = p.toolkit.get_action('package_update_rest')(context, package_dict)
else: else:
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid) log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
@ -309,7 +308,7 @@ class HarvesterBase(SingletonPlugin):
harvest_object.current = True harvest_object.current = True
harvest_object.save() harvest_object.save()
except NotFound: except p.toolkit.NotFound:
# Package needs to be created # Package needs to be created
# Get rid of auth audit on the context otherwise we'll get an # Get rid of auth audit on the context otherwise we'll get an
@ -333,13 +332,13 @@ class HarvesterBase(SingletonPlugin):
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
model.Session.flush() model.Session.flush()
new_package = get_action('package_create_rest')(context, package_dict) new_package = p.toolkit.get_action('package_create_rest')(context, package_dict)
Session.commit() Session.commit()
return True return True
except ValidationError,e: except p.toolkit.ValidationError, e:
log.exception(e) log.exception(e)
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import') self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
except Exception, e: except Exception, e:
@ -352,5 +351,5 @@ class HarvesterBase(SingletonPlugin):
data_dict = {'id': package_dict['id']} data_dict = {'id': package_dict['id']}
package_show_context = {'model': model, 'session': Session, package_show_context = {'model': model, 'session': Session,
'ignore_auth': True} 'ignore_auth': True}
return get_action('package_show')( return p.toolkit.get_action('package_show')(
package_show_context, data_dict) package_show_context, data_dict)

View File

@ -17,6 +17,7 @@ log = logging.getLogger(__name__)
from base import HarvesterBase from base import HarvesterBase
class CKANHarvester(HarvesterBase): class CKANHarvester(HarvesterBase):
''' '''
A Harvester for CKAN instances A Harvester for CKAN instances
@ -61,20 +62,22 @@ class CKANHarvester(HarvesterBase):
content = self._get_content(url) content = self._get_content(url)
return json.loads(content) return json.loads(content)
except (ContentFetchError, ValueError): except (ContentFetchError, ValueError):
log.debug('Could not fetch/decode remote group'); log.debug('Could not fetch/decode remote group')
raise RemoteResourceError('Could not fetch/decode remote group') raise RemoteResourceError('Could not fetch/decode remote group')
def _get_organization(self, base_url, org_name): def _get_organization(self, base_url, org_name):
url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name url = base_url + self._get_action_api_offset() + \
'/organization_show?id=' + org_name
try: try:
content = self._get_content(url) content = self._get_content(url)
content_dict = json.loads(content) content_dict = json.loads(content)
return content_dict['result'] return content_dict['result']
except (ContentFetchError, ValueError, KeyError): except (ContentFetchError, ValueError, KeyError):
log.debug('Could not fetch/decode remote group'); log.debug('Could not fetch/decode remote group')
raise RemoteResourceError('Could not fetch/decode remote organization') raise RemoteResourceError(
'Could not fetch/decode remote organization')
def _set_config(self,config_str): def _set_config(self, config_str):
if config_str: if config_str:
self.config = json.loads(config_str) self.config = json.loads(config_str)
if 'api_version' in self.config: if 'api_version' in self.config:
@ -89,10 +92,10 @@ class CKANHarvester(HarvesterBase):
'name': 'ckan', 'name': 'ckan',
'title': 'CKAN', 'title': 'CKAN',
'description': 'Harvests remote CKAN instances', 'description': 'Harvests remote CKAN instances',
'form_config_interface':'Text' 'form_config_interface': 'Text'
} }
def validate_config(self,config): def validate_config(self, config):
if not config: if not config:
return config return config
@ -106,46 +109,48 @@ class CKANHarvester(HarvesterBase):
raise ValueError('api_version must be an integer') raise ValueError('api_version must be an integer')
if 'default_tags' in config_obj: if 'default_tags' in config_obj:
if not isinstance(config_obj['default_tags'],list): if not isinstance(config_obj['default_tags'], list):
raise ValueError('default_tags must be a list') raise ValueError('default_tags must be a list')
if 'default_groups' in config_obj: if 'default_groups' in config_obj:
if not isinstance(config_obj['default_groups'],list): if not isinstance(config_obj['default_groups'], list):
raise ValueError('default_groups must be a list') raise ValueError('default_groups must be a list')
# Check if default groups exist # Check if default groups exist
context = {'model':model,'user':c.user} context = {'model': model, 'user': c.user}
for group_name in config_obj['default_groups']: for group_name in config_obj['default_groups']:
try: try:
group = get_action('group_show')(context,{'id':group_name}) group = get_action('group_show')(
except NotFound,e: context, {'id': group_name})
except NotFound, e:
raise ValueError('Default group not found') raise ValueError('Default group not found')
if 'default_extras' in config_obj: if 'default_extras' in config_obj:
if not isinstance(config_obj['default_extras'],dict): if not isinstance(config_obj['default_extras'], dict):
raise ValueError('default_extras must be a dictionary') raise ValueError('default_extras must be a dictionary')
if 'user' in config_obj: if 'user' in config_obj:
# Check if user exists # Check if user exists
context = {'model':model,'user':c.user} context = {'model': model, 'user': c.user}
try: try:
user = get_action('user_show')(context,{'id':config_obj.get('user')}) user = get_action('user_show')(
except NotFound,e: context, {'id': config_obj.get('user')})
except NotFound:
raise ValueError('User not found') raise ValueError('User not found')
for key in ('read_only','force_all'): for key in ('read_only', 'force_all'):
if key in config_obj: if key in config_obj:
if not isinstance(config_obj[key],bool): if not isinstance(config_obj[key], bool):
raise ValueError('%s must be boolean' % key) raise ValueError('%s must be boolean' % key)
except ValueError,e: except ValueError, e:
raise e raise e
return config return config
def gather_stage(self, harvest_job):
def gather_stage(self,harvest_job): log.debug('In CKANHarvester gather_stage (%s)',
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url) harvest_job.source.url)
get_all_packages = True get_all_packages = True
package_ids = [] package_ids = []
@ -284,17 +289,20 @@ class CKANHarvester(HarvesterBase):
harvest_object.save() harvest_object.save()
return True return True
def import_stage(self,harvest_object):
def import_stage(self, harvest_object):
log.debug('In CKANHarvester import_stage') log.debug('In CKANHarvester import_stage')
context = {'model': model, 'session': Session, 'user': self._get_user_name()} context = {'model': model, 'session': Session,
'user': self._get_user_name()}
if not harvest_object: if not harvest_object:
log.error('No harvest object received') log.error('No harvest object received')
return False return False
if harvest_object.content is None: if harvest_object.content is None:
self._save_object_error('Empty content for object %s' % harvest_object.id, self._save_object_error('Empty content for object %s' %
harvest_object, 'Import') harvest_object.id,
harvest_object, 'Import')
return False return False
self._set_config(harvest_object.job.source.config) self._set_config(harvest_object.job.source.config)
@ -307,11 +315,12 @@ class CKANHarvester(HarvesterBase):
return True return True
# Set default tags if needed # Set default tags if needed
default_tags = self.config.get('default_tags',[]) default_tags = self.config.get('default_tags', [])
if default_tags: if default_tags:
if not 'tags' in package_dict: if not 'tags' in package_dict:
package_dict['tags'] = [] package_dict['tags'] = []
package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']]) package_dict['tags'].extend(
[t for t in default_tags if t not in package_dict['tags']])
remote_groups = self.config.get('remote_groups', None) remote_groups = self.config.get('remote_groups', None)
if not remote_groups in ('only_local', 'create'): if not remote_groups in ('only_local', 'create'):
@ -333,19 +342,19 @@ class CKANHarvester(HarvesterBase):
else: else:
validated_groups.append(group['id']) validated_groups.append(group['id'])
except NotFound, e: except NotFound, e:
log.info('Group %s is not available' % group_name) log.info('Group %s is not available', group_name)
if remote_groups == 'create': if remote_groups == 'create':
try: try:
group = self._get_group(harvest_object.source.url, group_name) group = self._get_group(harvest_object.source.url, group_name)
except RemoteResourceError: except RemoteResourceError:
log.error('Could not get remote group %s' % group_name) log.error('Could not get remote group %s', group_name)
continue continue
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']: for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
group.pop(key, None) group.pop(key, None)
get_action('group_create')(context, group) get_action('group_create')(context, group)
log.info('Group %s has been newly created' % group_name) log.info('Group %s has been newly created', group_name)
if self.api_version == 1: if self.api_version == 1:
validated_groups.append(group['name']) validated_groups.append(group['name'])
else: else:
@ -377,7 +386,7 @@ class CKANHarvester(HarvesterBase):
org = get_action('organization_show')(context, data_dict) org = get_action('organization_show')(context, data_dict)
validated_org = org['id'] validated_org = org['id']
except NotFound, e: except NotFound, e:
log.info('Organization %s is not available' % remote_org) log.info('Organization %s is not available', remote_org)
if remote_orgs == 'create': if remote_orgs == 'create':
try: try:
try: try:
@ -390,10 +399,10 @@ class CKANHarvester(HarvesterBase):
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']: for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
org.pop(key, None) org.pop(key, None)
get_action('organization_create')(context, org) get_action('organization_create')(context, org)
log.info('Organization %s has been newly created' % remote_org) log.info('Organization %s has been newly created', remote_org)
validated_org = org['id'] validated_org = org['id']
except (RemoteResourceError, ValidationError): except (RemoteResourceError, ValidationError):
log.error('Could not get remote org %s' % remote_org) log.error('Could not get remote org %s', remote_org)
package_dict['owner_org'] = validated_org or local_org package_dict['owner_org'] = validated_org or local_org
@ -417,12 +426,12 @@ class CKANHarvester(HarvesterBase):
del package_dict['extras'][key] del package_dict['extras'][key]
# Set default extras if needed # Set default extras if needed
default_extras = self.config.get('default_extras',{}) default_extras = self.config.get('default_extras', {})
if default_extras: if default_extras:
override_extras = self.config.get('override_extras',False) override_extras = self.config.get('override_extras', False)
if not 'extras' in package_dict: if not 'extras' in package_dict:
package_dict['extras'] = {} package_dict['extras'] = {}
for key,value in default_extras.iteritems(): for key, value in default_extras.iteritems():
if not key in package_dict['extras'] or override_extras: if not key in package_dict['extras'] or override_extras:
# Look for replacement strings # Look for replacement strings
if isinstance(value,basestring): if isinstance(value,basestring):
@ -466,9 +475,10 @@ class CKANHarvester(HarvesterBase):
pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER) pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER)
return result return result
except ValidationError,e: except ValidationError, e:
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict), self._save_object_error('Invalid package with GUID %s: %r' %
harvest_object, 'Import') (harvest_object.guid, e.error_dict),
harvest_object, 'Import')
except Exception, e: except Exception, e:
self._save_object_error('%r'%e,harvest_object,'Import') self._save_object_error('%r'%e,harvest_object,'Import')