commit
b485aa97e5
10
README.rst
10
README.rst
|
@ -585,7 +585,8 @@ following steps with the one you are using.
|
|||
describe the tasks that need to be monitored. This configuration files are
|
||||
stored in ``/etc/supervisor/conf.d``.
|
||||
|
||||
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents::
|
||||
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and
|
||||
copy the following contents::
|
||||
|
||||
|
||||
; ===============================
|
||||
|
@ -676,10 +677,11 @@ following steps with the one you are using.
|
|||
|
||||
sudo crontab -e -u ckan
|
||||
|
||||
Note that we are running this command as the same user we configured the processes to be run with
|
||||
(`ckan` in our example).
|
||||
Note that we are running this command as the same user we configured the
|
||||
processes to be run with (`ckan` in our example).
|
||||
|
||||
Paste this line into your crontab, again replacing the paths to paster and the ini file with yours::
|
||||
Paste this line into your crontab, again replacing the paths to paster and
|
||||
the ini file with yours::
|
||||
|
||||
# m h dom mon dow command
|
||||
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini
|
||||
|
|
|
@ -8,7 +8,6 @@ from pylons import config
|
|||
from ckan import plugins as p
|
||||
from ckan import model
|
||||
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
|
||||
from ckan.logic import ValidationError, NotFound, get_action
|
||||
|
||||
from ckan.logic.schema import default_create_package_schema
|
||||
from ckan.lib.navl.validators import ignore_missing, ignore
|
||||
|
@ -288,7 +287,7 @@ class HarvesterBase(SingletonPlugin):
|
|||
context.update({'id':package_dict['id']})
|
||||
package_dict.setdefault('name',
|
||||
existing_package_dict['name'])
|
||||
new_package = get_action('package_update_rest')(context, package_dict)
|
||||
new_package = p.toolkit.get_action('package_update_rest')(context, package_dict)
|
||||
|
||||
else:
|
||||
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
|
||||
|
@ -309,7 +308,7 @@ class HarvesterBase(SingletonPlugin):
|
|||
harvest_object.current = True
|
||||
harvest_object.save()
|
||||
|
||||
except NotFound:
|
||||
except p.toolkit.ObjectNotFound:
|
||||
# Package needs to be created
|
||||
|
||||
# Get rid of auth audit on the context otherwise we'll get an
|
||||
|
@ -333,13 +332,13 @@ class HarvesterBase(SingletonPlugin):
|
|||
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
||||
model.Session.flush()
|
||||
|
||||
new_package = get_action('package_create_rest')(context, package_dict)
|
||||
new_package = p.toolkit.get_action('package_create_rest')(context, package_dict)
|
||||
|
||||
Session.commit()
|
||||
|
||||
return True
|
||||
|
||||
except ValidationError,e:
|
||||
except p.toolkit.ValidationError, e:
|
||||
log.exception(e)
|
||||
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
|
||||
except Exception, e:
|
||||
|
@ -352,5 +351,5 @@ class HarvesterBase(SingletonPlugin):
|
|||
data_dict = {'id': package_dict['id']}
|
||||
package_show_context = {'model': model, 'session': Session,
|
||||
'ignore_auth': True}
|
||||
return get_action('package_show')(
|
||||
return p.toolkit.get_action('package_show')(
|
||||
package_show_context, data_dict)
|
||||
|
|
|
@ -17,6 +17,7 @@ log = logging.getLogger(__name__)
|
|||
|
||||
from base import HarvesterBase
|
||||
|
||||
|
||||
class CKANHarvester(HarvesterBase):
|
||||
'''
|
||||
A Harvester for CKAN instances
|
||||
|
@ -61,20 +62,22 @@ class CKANHarvester(HarvesterBase):
|
|||
content = self._get_content(url)
|
||||
return json.loads(content)
|
||||
except (ContentFetchError, ValueError):
|
||||
log.debug('Could not fetch/decode remote group');
|
||||
log.debug('Could not fetch/decode remote group')
|
||||
raise RemoteResourceError('Could not fetch/decode remote group')
|
||||
|
||||
def _get_organization(self, base_url, org_name):
|
||||
url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name
|
||||
url = base_url + self._get_action_api_offset() + \
|
||||
'/organization_show?id=' + org_name
|
||||
try:
|
||||
content = self._get_content(url)
|
||||
content_dict = json.loads(content)
|
||||
return content_dict['result']
|
||||
except (ContentFetchError, ValueError, KeyError):
|
||||
log.debug('Could not fetch/decode remote group');
|
||||
raise RemoteResourceError('Could not fetch/decode remote organization')
|
||||
log.debug('Could not fetch/decode remote group')
|
||||
raise RemoteResourceError(
|
||||
'Could not fetch/decode remote organization')
|
||||
|
||||
def _set_config(self,config_str):
|
||||
def _set_config(self, config_str):
|
||||
if config_str:
|
||||
self.config = json.loads(config_str)
|
||||
if 'api_version' in self.config:
|
||||
|
@ -89,10 +92,10 @@ class CKANHarvester(HarvesterBase):
|
|||
'name': 'ckan',
|
||||
'title': 'CKAN',
|
||||
'description': 'Harvests remote CKAN instances',
|
||||
'form_config_interface':'Text'
|
||||
'form_config_interface': 'Text'
|
||||
}
|
||||
|
||||
def validate_config(self,config):
|
||||
def validate_config(self, config):
|
||||
if not config:
|
||||
return config
|
||||
|
||||
|
@ -106,46 +109,48 @@ class CKANHarvester(HarvesterBase):
|
|||
raise ValueError('api_version must be an integer')
|
||||
|
||||
if 'default_tags' in config_obj:
|
||||
if not isinstance(config_obj['default_tags'],list):
|
||||
if not isinstance(config_obj['default_tags'], list):
|
||||
raise ValueError('default_tags must be a list')
|
||||
|
||||
if 'default_groups' in config_obj:
|
||||
if not isinstance(config_obj['default_groups'],list):
|
||||
if not isinstance(config_obj['default_groups'], list):
|
||||
raise ValueError('default_groups must be a list')
|
||||
|
||||
# Check if default groups exist
|
||||
context = {'model':model,'user':c.user}
|
||||
context = {'model': model, 'user': c.user}
|
||||
for group_name in config_obj['default_groups']:
|
||||
try:
|
||||
group = get_action('group_show')(context,{'id':group_name})
|
||||
except NotFound,e:
|
||||
group = get_action('group_show')(
|
||||
context, {'id': group_name})
|
||||
except NotFound, e:
|
||||
raise ValueError('Default group not found')
|
||||
|
||||
if 'default_extras' in config_obj:
|
||||
if not isinstance(config_obj['default_extras'],dict):
|
||||
if not isinstance(config_obj['default_extras'], dict):
|
||||
raise ValueError('default_extras must be a dictionary')
|
||||
|
||||
if 'user' in config_obj:
|
||||
# Check if user exists
|
||||
context = {'model':model,'user':c.user}
|
||||
context = {'model': model, 'user': c.user}
|
||||
try:
|
||||
user = get_action('user_show')(context,{'id':config_obj.get('user')})
|
||||
except NotFound,e:
|
||||
user = get_action('user_show')(
|
||||
context, {'id': config_obj.get('user')})
|
||||
except NotFound:
|
||||
raise ValueError('User not found')
|
||||
|
||||
for key in ('read_only','force_all'):
|
||||
for key in ('read_only', 'force_all'):
|
||||
if key in config_obj:
|
||||
if not isinstance(config_obj[key],bool):
|
||||
if not isinstance(config_obj[key], bool):
|
||||
raise ValueError('%s must be boolean' % key)
|
||||
|
||||
except ValueError,e:
|
||||
except ValueError, e:
|
||||
raise e
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def gather_stage(self,harvest_job):
|
||||
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
|
||||
def gather_stage(self, harvest_job):
|
||||
log.debug('In CKANHarvester gather_stage (%s)',
|
||||
harvest_job.source.url)
|
||||
get_all_packages = True
|
||||
package_ids = []
|
||||
|
||||
|
@ -284,16 +289,19 @@ class CKANHarvester(HarvesterBase):
|
|||
harvest_object.save()
|
||||
return True
|
||||
|
||||
def import_stage(self,harvest_object):
|
||||
|
||||
def import_stage(self, harvest_object):
|
||||
log.debug('In CKANHarvester import_stage')
|
||||
|
||||
context = {'model': model, 'session': Session, 'user': self._get_user_name()}
|
||||
context = {'model': model, 'session': Session,
|
||||
'user': self._get_user_name()}
|
||||
if not harvest_object:
|
||||
log.error('No harvest object received')
|
||||
return False
|
||||
|
||||
if harvest_object.content is None:
|
||||
self._save_object_error('Empty content for object %s' % harvest_object.id,
|
||||
self._save_object_error('Empty content for object %s' %
|
||||
harvest_object.id,
|
||||
harvest_object, 'Import')
|
||||
return False
|
||||
|
||||
|
@ -307,11 +315,12 @@ class CKANHarvester(HarvesterBase):
|
|||
return True
|
||||
|
||||
# Set default tags if needed
|
||||
default_tags = self.config.get('default_tags',[])
|
||||
default_tags = self.config.get('default_tags', [])
|
||||
if default_tags:
|
||||
if not 'tags' in package_dict:
|
||||
package_dict['tags'] = []
|
||||
package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']])
|
||||
package_dict['tags'].extend(
|
||||
[t for t in default_tags if t not in package_dict['tags']])
|
||||
|
||||
remote_groups = self.config.get('remote_groups', None)
|
||||
if not remote_groups in ('only_local', 'create'):
|
||||
|
@ -333,19 +342,19 @@ class CKANHarvester(HarvesterBase):
|
|||
else:
|
||||
validated_groups.append(group['id'])
|
||||
except NotFound, e:
|
||||
log.info('Group %s is not available' % group_name)
|
||||
log.info('Group %s is not available', group_name)
|
||||
if remote_groups == 'create':
|
||||
try:
|
||||
group = self._get_group(harvest_object.source.url, group_name)
|
||||
except RemoteResourceError:
|
||||
log.error('Could not get remote group %s' % group_name)
|
||||
log.error('Could not get remote group %s', group_name)
|
||||
continue
|
||||
|
||||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
|
||||
group.pop(key, None)
|
||||
|
||||
get_action('group_create')(context, group)
|
||||
log.info('Group %s has been newly created' % group_name)
|
||||
log.info('Group %s has been newly created', group_name)
|
||||
if self.api_version == 1:
|
||||
validated_groups.append(group['name'])
|
||||
else:
|
||||
|
@ -377,7 +386,7 @@ class CKANHarvester(HarvesterBase):
|
|||
org = get_action('organization_show')(context, data_dict)
|
||||
validated_org = org['id']
|
||||
except NotFound, e:
|
||||
log.info('Organization %s is not available' % remote_org)
|
||||
log.info('Organization %s is not available', remote_org)
|
||||
if remote_orgs == 'create':
|
||||
try:
|
||||
try:
|
||||
|
@ -390,10 +399,10 @@ class CKANHarvester(HarvesterBase):
|
|||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
|
||||
org.pop(key, None)
|
||||
get_action('organization_create')(context, org)
|
||||
log.info('Organization %s has been newly created' % remote_org)
|
||||
log.info('Organization %s has been newly created', remote_org)
|
||||
validated_org = org['id']
|
||||
except (RemoteResourceError, ValidationError):
|
||||
log.error('Could not get remote org %s' % remote_org)
|
||||
log.error('Could not get remote org %s', remote_org)
|
||||
|
||||
package_dict['owner_org'] = validated_org or local_org
|
||||
|
||||
|
@ -417,12 +426,12 @@ class CKANHarvester(HarvesterBase):
|
|||
del package_dict['extras'][key]
|
||||
|
||||
# Set default extras if needed
|
||||
default_extras = self.config.get('default_extras',{})
|
||||
default_extras = self.config.get('default_extras', {})
|
||||
if default_extras:
|
||||
override_extras = self.config.get('override_extras',False)
|
||||
override_extras = self.config.get('override_extras', False)
|
||||
if not 'extras' in package_dict:
|
||||
package_dict['extras'] = {}
|
||||
for key,value in default_extras.iteritems():
|
||||
for key, value in default_extras.iteritems():
|
||||
if not key in package_dict['extras'] or override_extras:
|
||||
# Look for replacement strings
|
||||
if isinstance(value,basestring):
|
||||
|
@ -466,8 +475,9 @@ class CKANHarvester(HarvesterBase):
|
|||
pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER)
|
||||
|
||||
return result
|
||||
except ValidationError,e:
|
||||
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
|
||||
except ValidationError, e:
|
||||
self._save_object_error('Invalid package with GUID %s: %r' %
|
||||
(harvest_object.guid, e.error_dict),
|
||||
harvest_object, 'Import')
|
||||
except Exception, e:
|
||||
self._save_object_error('%r'%e,harvest_object,'Import')
|
||||
|
|
Loading…
Reference in New Issue