[#158] PEP8/formatting.

This commit is contained in:
David Read 2015-10-27 17:43:11 +00:00
parent 2a79873855
commit 55245b5091
2 changed files with 44 additions and 35 deletions

View File

@ -12,8 +12,9 @@ and adds a CLI and a WUI to CKAN to manage harvesting sources and jobs.
Installation Installation
============ ============
This extension requires CKAN v2.0 or later, although the CKAN harvester can This extension requires CKAN v2.0 or later on both the CKAN it is installed
harvest from CKANs of earlier versions. into and the CKANs it harvests. However you are unlikely to encounter a CKAN
running a version lower than 2.0.
1. The harvest extension can use two different backends. You can choose whichever 1. The harvest extension can use two different backends. You can choose whichever
you prefer depending on your needs, but Redis has been found to be more stable you prefer depending on your needs, but Redis has been found to be more stable
@ -49,8 +50,9 @@ harvest from CKANs of earlier versions.
ckan.plugins = harvest ckan_harvester ckan.plugins = harvest ckan_harvester
5. If you haven't done it yet on the previous step, define the backend that you are using with the ``ckan.harvest.mq.type`` 5. If you haven't done it yet on the previous step, define the backend that you
option (it defaults to ``rabbitmq``):: are using with the ``ckan.harvest.mq.type`` option (it defaults to
``rabbitmq``)::
ckan.harvest.mq.type = redis ckan.harvest.mq.type = redis
@ -473,7 +475,8 @@ following steps with the one you are using.
describe the tasks that need to be monitored. This configuration files are describe the tasks that need to be monitored. This configuration files are
stored in ``/etc/supervisor/conf.d``. stored in ``/etc/supervisor/conf.d``.
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents:: Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and
copy the following contents::
; =============================== ; ===============================
@ -564,10 +567,11 @@ following steps with the one you are using.
sudo crontab -e -u ckan sudo crontab -e -u ckan
Note that we are running this command as the same user we configured the processes to be run with Note that we are running this command as the same user we configured the
(`ckan` in our example). processes to be run with (`ckan` in our example).
Paste this line into your crontab, again replacing the paths to paster and the ini file with yours:: Paste this line into your crontab, again replacing the paths to paster and
the ini file with yours::
# m h dom mon dow command # m h dom mon dow command
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini */15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini

View File

@ -29,7 +29,7 @@ class CKANHarvester(HarvesterBase):
return '/api/%d/action' % self.action_api_version return '/api/%d/action' % self.action_api_version
def _get_search_api_offset(self): def _get_search_api_offset(self):
return "%s/package_search" % self._get_action_api_offset() return '%s/package_search' % self._get_action_api_offset()
def _get_content(self, url): def _get_content(self, url):
http_request = urllib2.Request(url=url) http_request = urllib2.Request(url=url)
@ -48,25 +48,28 @@ class CKANHarvester(HarvesterBase):
return http_response.read() return http_response.read()
def _get_group(self, base_url, group_name): def _get_group(self, base_url, group_name):
url = base_url + self._get_action_api_offset() + '/group_show?id=' + munge_name(group_name) url = base_url + self._get_action_api_offset() + '/group_show?id=' + \
munge_name(group_name)
try: try:
content = self._get_content(url) content = self._get_content(url)
return json.loads(content) return json.loads(content)
except (ContentFetchError, ValueError): except (ContentFetchError, ValueError):
log.debug('Could not fetch/decode remote group'); log.debug('Could not fetch/decode remote group')
raise RemoteResourceError('Could not fetch/decode remote group') raise RemoteResourceError('Could not fetch/decode remote group')
def _get_organization(self, base_url, org_name): def _get_organization(self, base_url, org_name):
url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name url = base_url + self._get_action_api_offset() + \
'/organization_show?id=' + org_name
try: try:
content = self._get_content(url) content = self._get_content(url)
content_dict = json.loads(content) content_dict = json.loads(content)
return content_dict['result'] return content_dict['result']
except (ContentFetchError, ValueError, KeyError): except (ContentFetchError, ValueError, KeyError):
log.debug('Could not fetch/decode remote group'); log.debug('Could not fetch/decode remote group')
raise RemoteResourceError('Could not fetch/decode remote organization') raise RemoteResourceError(
'Could not fetch/decode remote organization')
def _set_config(self,config_str): def _set_config(self, config_str):
if config_str: if config_str:
self.config = json.loads(config_str) self.config = json.loads(config_str)
if 'api_version' in self.config: if 'api_version' in self.config:
@ -81,10 +84,10 @@ class CKANHarvester(HarvesterBase):
'name': 'ckan', 'name': 'ckan',
'title': 'CKAN', 'title': 'CKAN',
'description': 'Harvests remote CKAN instances', 'description': 'Harvests remote CKAN instances',
'form_config_interface':'Text' 'form_config_interface': 'Text'
} }
def validate_config(self,config): def validate_config(self, config):
if not config: if not config:
return config return config
@ -98,39 +101,41 @@ class CKANHarvester(HarvesterBase):
raise ValueError('api_version must be an integer') raise ValueError('api_version must be an integer')
if 'default_tags' in config_obj: if 'default_tags' in config_obj:
if not isinstance(config_obj['default_tags'],list): if not isinstance(config_obj['default_tags'], list):
raise ValueError('default_tags must be a list') raise ValueError('default_tags must be a list')
if 'default_groups' in config_obj: if 'default_groups' in config_obj:
if not isinstance(config_obj['default_groups'],list): if not isinstance(config_obj['default_groups'], list):
raise ValueError('default_groups must be a list') raise ValueError('default_groups must be a list')
# Check if default groups exist # Check if default groups exist
context = {'model':model,'user':c.user} context = {'model': model, 'user': c.user}
for group_name in config_obj['default_groups']: for group_name in config_obj['default_groups']:
try: try:
group = get_action('group_show')(context,{'id':group_name}) group = get_action('group_show')(
except NotFound,e: context, {'id': group_name})
except NotFound, e:
raise ValueError('Default group not found') raise ValueError('Default group not found')
if 'default_extras' in config_obj: if 'default_extras' in config_obj:
if not isinstance(config_obj['default_extras'],dict): if not isinstance(config_obj['default_extras'], dict):
raise ValueError('default_extras must be a dictionary') raise ValueError('default_extras must be a dictionary')
if 'user' in config_obj: if 'user' in config_obj:
# Check if user exists # Check if user exists
context = {'model':model,'user':c.user} context = {'model': model, 'user': c.user}
try: try:
user = get_action('user_show')(context,{'id':config_obj.get('user')}) user = get_action('user_show')(
except NotFound,e: context, {'id': config_obj.get('user')})
except NotFound, e:
raise ValueError('User not found') raise ValueError('User not found')
for key in ('read_only','force_all'): for key in ('read_only', 'force_all'):
if key in config_obj: if key in config_obj:
if not isinstance(config_obj[key],bool): if not isinstance(config_obj[key], bool):
raise ValueError('%s must be boolean' % key) raise ValueError('%s must be boolean' % key)
except ValueError,e: except ValueError, e:
raise e raise e
return config return config
@ -334,19 +339,19 @@ class CKANHarvester(HarvesterBase):
else: else:
validated_groups.append(group['id']) validated_groups.append(group['id'])
except NotFound, e: except NotFound, e:
log.info('Group %s is not available' % group_name) log.info('Group %s is not available', group_name)
if remote_groups == 'create': if remote_groups == 'create':
try: try:
group = self._get_group(harvest_object.source.url, group_name) group = self._get_group(harvest_object.source.url, group_name)
except RemoteResourceError: except RemoteResourceError:
log.error('Could not get remote group %s' % group_name) log.error('Could not get remote group %s', group_name)
continue continue
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']: for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
group.pop(key, None) group.pop(key, None)
get_action('group_create')(context, group) get_action('group_create')(context, group)
log.info('Group %s has been newly created' % group_name) log.info('Group %s has been newly created', group_name)
if self.api_version == 1: if self.api_version == 1:
validated_groups.append(group['name']) validated_groups.append(group['name'])
else: else:
@ -378,7 +383,7 @@ class CKANHarvester(HarvesterBase):
org = get_action('organization_show')(context, data_dict) org = get_action('organization_show')(context, data_dict)
validated_org = org['id'] validated_org = org['id']
except NotFound, e: except NotFound, e:
log.info('Organization %s is not available' % remote_org) log.info('Organization %s is not available', remote_org)
if remote_orgs == 'create': if remote_orgs == 'create':
try: try:
try: try:
@ -391,10 +396,10 @@ class CKANHarvester(HarvesterBase):
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']: for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
org.pop(key, None) org.pop(key, None)
get_action('organization_create')(context, org) get_action('organization_create')(context, org)
log.info('Organization %s has been newly created' % remote_org) log.info('Organization %s has been newly created', remote_org)
validated_org = org['id'] validated_org = org['id']
except (RemoteResourceError, ValidationError): except (RemoteResourceError, ValidationError):
log.error('Could not get remote org %s' % remote_org) log.error('Could not get remote org %s', remote_org)
package_dict['owner_org'] = validated_org or local_org package_dict['owner_org'] = validated_org or local_org