commit
b485aa97e5
10
README.rst
10
README.rst
|
@ -585,7 +585,8 @@ following steps with the one you are using.
|
||||||
describe the tasks that need to be monitored. This configuration files are
|
describe the tasks that need to be monitored. This configuration files are
|
||||||
stored in ``/etc/supervisor/conf.d``.
|
stored in ``/etc/supervisor/conf.d``.
|
||||||
|
|
||||||
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents::
|
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and
|
||||||
|
copy the following contents::
|
||||||
|
|
||||||
|
|
||||||
; ===============================
|
; ===============================
|
||||||
|
@ -676,10 +677,11 @@ following steps with the one you are using.
|
||||||
|
|
||||||
sudo crontab -e -u ckan
|
sudo crontab -e -u ckan
|
||||||
|
|
||||||
Note that we are running this command as the same user we configured the processes to be run with
|
Note that we are running this command as the same user we configured the
|
||||||
(`ckan` in our example).
|
processes to be run with (`ckan` in our example).
|
||||||
|
|
||||||
Paste this line into your crontab, again replacing the paths to paster and the ini file with yours::
|
Paste this line into your crontab, again replacing the paths to paster and
|
||||||
|
the ini file with yours::
|
||||||
|
|
||||||
# m h dom mon dow command
|
# m h dom mon dow command
|
||||||
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini
|
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini
|
||||||
|
|
|
@ -8,7 +8,6 @@ from pylons import config
|
||||||
from ckan import plugins as p
|
from ckan import plugins as p
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
|
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
|
||||||
from ckan.logic import ValidationError, NotFound, get_action
|
|
||||||
|
|
||||||
from ckan.logic.schema import default_create_package_schema
|
from ckan.logic.schema import default_create_package_schema
|
||||||
from ckan.lib.navl.validators import ignore_missing, ignore
|
from ckan.lib.navl.validators import ignore_missing, ignore
|
||||||
|
@ -288,7 +287,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
context.update({'id':package_dict['id']})
|
context.update({'id':package_dict['id']})
|
||||||
package_dict.setdefault('name',
|
package_dict.setdefault('name',
|
||||||
existing_package_dict['name'])
|
existing_package_dict['name'])
|
||||||
new_package = get_action('package_update_rest')(context, package_dict)
|
new_package = p.toolkit.get_action('package_update_rest')(context, package_dict)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
|
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
|
||||||
|
@ -309,7 +308,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
harvest_object.current = True
|
harvest_object.current = True
|
||||||
harvest_object.save()
|
harvest_object.save()
|
||||||
|
|
||||||
except NotFound:
|
except p.toolkit.ObjectNotFound:
|
||||||
# Package needs to be created
|
# Package needs to be created
|
||||||
|
|
||||||
# Get rid of auth audit on the context otherwise we'll get an
|
# Get rid of auth audit on the context otherwise we'll get an
|
||||||
|
@ -333,13 +332,13 @@ class HarvesterBase(SingletonPlugin):
|
||||||
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
||||||
model.Session.flush()
|
model.Session.flush()
|
||||||
|
|
||||||
new_package = get_action('package_create_rest')(context, package_dict)
|
new_package = p.toolkit.get_action('package_create_rest')(context, package_dict)
|
||||||
|
|
||||||
Session.commit()
|
Session.commit()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except ValidationError,e:
|
except p.toolkit.ValidationError, e:
|
||||||
log.exception(e)
|
log.exception(e)
|
||||||
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
|
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
@ -352,5 +351,5 @@ class HarvesterBase(SingletonPlugin):
|
||||||
data_dict = {'id': package_dict['id']}
|
data_dict = {'id': package_dict['id']}
|
||||||
package_show_context = {'model': model, 'session': Session,
|
package_show_context = {'model': model, 'session': Session,
|
||||||
'ignore_auth': True}
|
'ignore_auth': True}
|
||||||
return get_action('package_show')(
|
return p.toolkit.get_action('package_show')(
|
||||||
package_show_context, data_dict)
|
package_show_context, data_dict)
|
||||||
|
|
|
@ -17,6 +17,7 @@ log = logging.getLogger(__name__)
|
||||||
|
|
||||||
from base import HarvesterBase
|
from base import HarvesterBase
|
||||||
|
|
||||||
|
|
||||||
class CKANHarvester(HarvesterBase):
|
class CKANHarvester(HarvesterBase):
|
||||||
'''
|
'''
|
||||||
A Harvester for CKAN instances
|
A Harvester for CKAN instances
|
||||||
|
@ -61,20 +62,22 @@ class CKANHarvester(HarvesterBase):
|
||||||
content = self._get_content(url)
|
content = self._get_content(url)
|
||||||
return json.loads(content)
|
return json.loads(content)
|
||||||
except (ContentFetchError, ValueError):
|
except (ContentFetchError, ValueError):
|
||||||
log.debug('Could not fetch/decode remote group');
|
log.debug('Could not fetch/decode remote group')
|
||||||
raise RemoteResourceError('Could not fetch/decode remote group')
|
raise RemoteResourceError('Could not fetch/decode remote group')
|
||||||
|
|
||||||
def _get_organization(self, base_url, org_name):
|
def _get_organization(self, base_url, org_name):
|
||||||
url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name
|
url = base_url + self._get_action_api_offset() + \
|
||||||
|
'/organization_show?id=' + org_name
|
||||||
try:
|
try:
|
||||||
content = self._get_content(url)
|
content = self._get_content(url)
|
||||||
content_dict = json.loads(content)
|
content_dict = json.loads(content)
|
||||||
return content_dict['result']
|
return content_dict['result']
|
||||||
except (ContentFetchError, ValueError, KeyError):
|
except (ContentFetchError, ValueError, KeyError):
|
||||||
log.debug('Could not fetch/decode remote group');
|
log.debug('Could not fetch/decode remote group')
|
||||||
raise RemoteResourceError('Could not fetch/decode remote organization')
|
raise RemoteResourceError(
|
||||||
|
'Could not fetch/decode remote organization')
|
||||||
|
|
||||||
def _set_config(self,config_str):
|
def _set_config(self, config_str):
|
||||||
if config_str:
|
if config_str:
|
||||||
self.config = json.loads(config_str)
|
self.config = json.loads(config_str)
|
||||||
if 'api_version' in self.config:
|
if 'api_version' in self.config:
|
||||||
|
@ -89,10 +92,10 @@ class CKANHarvester(HarvesterBase):
|
||||||
'name': 'ckan',
|
'name': 'ckan',
|
||||||
'title': 'CKAN',
|
'title': 'CKAN',
|
||||||
'description': 'Harvests remote CKAN instances',
|
'description': 'Harvests remote CKAN instances',
|
||||||
'form_config_interface':'Text'
|
'form_config_interface': 'Text'
|
||||||
}
|
}
|
||||||
|
|
||||||
def validate_config(self,config):
|
def validate_config(self, config):
|
||||||
if not config:
|
if not config:
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
@ -106,46 +109,48 @@ class CKANHarvester(HarvesterBase):
|
||||||
raise ValueError('api_version must be an integer')
|
raise ValueError('api_version must be an integer')
|
||||||
|
|
||||||
if 'default_tags' in config_obj:
|
if 'default_tags' in config_obj:
|
||||||
if not isinstance(config_obj['default_tags'],list):
|
if not isinstance(config_obj['default_tags'], list):
|
||||||
raise ValueError('default_tags must be a list')
|
raise ValueError('default_tags must be a list')
|
||||||
|
|
||||||
if 'default_groups' in config_obj:
|
if 'default_groups' in config_obj:
|
||||||
if not isinstance(config_obj['default_groups'],list):
|
if not isinstance(config_obj['default_groups'], list):
|
||||||
raise ValueError('default_groups must be a list')
|
raise ValueError('default_groups must be a list')
|
||||||
|
|
||||||
# Check if default groups exist
|
# Check if default groups exist
|
||||||
context = {'model':model,'user':c.user}
|
context = {'model': model, 'user': c.user}
|
||||||
for group_name in config_obj['default_groups']:
|
for group_name in config_obj['default_groups']:
|
||||||
try:
|
try:
|
||||||
group = get_action('group_show')(context,{'id':group_name})
|
group = get_action('group_show')(
|
||||||
except NotFound,e:
|
context, {'id': group_name})
|
||||||
|
except NotFound, e:
|
||||||
raise ValueError('Default group not found')
|
raise ValueError('Default group not found')
|
||||||
|
|
||||||
if 'default_extras' in config_obj:
|
if 'default_extras' in config_obj:
|
||||||
if not isinstance(config_obj['default_extras'],dict):
|
if not isinstance(config_obj['default_extras'], dict):
|
||||||
raise ValueError('default_extras must be a dictionary')
|
raise ValueError('default_extras must be a dictionary')
|
||||||
|
|
||||||
if 'user' in config_obj:
|
if 'user' in config_obj:
|
||||||
# Check if user exists
|
# Check if user exists
|
||||||
context = {'model':model,'user':c.user}
|
context = {'model': model, 'user': c.user}
|
||||||
try:
|
try:
|
||||||
user = get_action('user_show')(context,{'id':config_obj.get('user')})
|
user = get_action('user_show')(
|
||||||
except NotFound,e:
|
context, {'id': config_obj.get('user')})
|
||||||
|
except NotFound:
|
||||||
raise ValueError('User not found')
|
raise ValueError('User not found')
|
||||||
|
|
||||||
for key in ('read_only','force_all'):
|
for key in ('read_only', 'force_all'):
|
||||||
if key in config_obj:
|
if key in config_obj:
|
||||||
if not isinstance(config_obj[key],bool):
|
if not isinstance(config_obj[key], bool):
|
||||||
raise ValueError('%s must be boolean' % key)
|
raise ValueError('%s must be boolean' % key)
|
||||||
|
|
||||||
except ValueError,e:
|
except ValueError, e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
def gather_stage(self, harvest_job):
|
||||||
def gather_stage(self,harvest_job):
|
log.debug('In CKANHarvester gather_stage (%s)',
|
||||||
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
|
harvest_job.source.url)
|
||||||
get_all_packages = True
|
get_all_packages = True
|
||||||
package_ids = []
|
package_ids = []
|
||||||
|
|
||||||
|
@ -284,17 +289,20 @@ class CKANHarvester(HarvesterBase):
|
||||||
harvest_object.save()
|
harvest_object.save()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def import_stage(self,harvest_object):
|
|
||||||
|
def import_stage(self, harvest_object):
|
||||||
log.debug('In CKANHarvester import_stage')
|
log.debug('In CKANHarvester import_stage')
|
||||||
|
|
||||||
context = {'model': model, 'session': Session, 'user': self._get_user_name()}
|
context = {'model': model, 'session': Session,
|
||||||
|
'user': self._get_user_name()}
|
||||||
if not harvest_object:
|
if not harvest_object:
|
||||||
log.error('No harvest object received')
|
log.error('No harvest object received')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if harvest_object.content is None:
|
if harvest_object.content is None:
|
||||||
self._save_object_error('Empty content for object %s' % harvest_object.id,
|
self._save_object_error('Empty content for object %s' %
|
||||||
harvest_object, 'Import')
|
harvest_object.id,
|
||||||
|
harvest_object, 'Import')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self._set_config(harvest_object.job.source.config)
|
self._set_config(harvest_object.job.source.config)
|
||||||
|
@ -307,11 +315,12 @@ class CKANHarvester(HarvesterBase):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Set default tags if needed
|
# Set default tags if needed
|
||||||
default_tags = self.config.get('default_tags',[])
|
default_tags = self.config.get('default_tags', [])
|
||||||
if default_tags:
|
if default_tags:
|
||||||
if not 'tags' in package_dict:
|
if not 'tags' in package_dict:
|
||||||
package_dict['tags'] = []
|
package_dict['tags'] = []
|
||||||
package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']])
|
package_dict['tags'].extend(
|
||||||
|
[t for t in default_tags if t not in package_dict['tags']])
|
||||||
|
|
||||||
remote_groups = self.config.get('remote_groups', None)
|
remote_groups = self.config.get('remote_groups', None)
|
||||||
if not remote_groups in ('only_local', 'create'):
|
if not remote_groups in ('only_local', 'create'):
|
||||||
|
@ -333,19 +342,19 @@ class CKANHarvester(HarvesterBase):
|
||||||
else:
|
else:
|
||||||
validated_groups.append(group['id'])
|
validated_groups.append(group['id'])
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
log.info('Group %s is not available' % group_name)
|
log.info('Group %s is not available', group_name)
|
||||||
if remote_groups == 'create':
|
if remote_groups == 'create':
|
||||||
try:
|
try:
|
||||||
group = self._get_group(harvest_object.source.url, group_name)
|
group = self._get_group(harvest_object.source.url, group_name)
|
||||||
except RemoteResourceError:
|
except RemoteResourceError:
|
||||||
log.error('Could not get remote group %s' % group_name)
|
log.error('Could not get remote group %s', group_name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
|
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
|
||||||
group.pop(key, None)
|
group.pop(key, None)
|
||||||
|
|
||||||
get_action('group_create')(context, group)
|
get_action('group_create')(context, group)
|
||||||
log.info('Group %s has been newly created' % group_name)
|
log.info('Group %s has been newly created', group_name)
|
||||||
if self.api_version == 1:
|
if self.api_version == 1:
|
||||||
validated_groups.append(group['name'])
|
validated_groups.append(group['name'])
|
||||||
else:
|
else:
|
||||||
|
@ -377,7 +386,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
org = get_action('organization_show')(context, data_dict)
|
org = get_action('organization_show')(context, data_dict)
|
||||||
validated_org = org['id']
|
validated_org = org['id']
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
log.info('Organization %s is not available' % remote_org)
|
log.info('Organization %s is not available', remote_org)
|
||||||
if remote_orgs == 'create':
|
if remote_orgs == 'create':
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
|
@ -390,10 +399,10 @@ class CKANHarvester(HarvesterBase):
|
||||||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
|
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
|
||||||
org.pop(key, None)
|
org.pop(key, None)
|
||||||
get_action('organization_create')(context, org)
|
get_action('organization_create')(context, org)
|
||||||
log.info('Organization %s has been newly created' % remote_org)
|
log.info('Organization %s has been newly created', remote_org)
|
||||||
validated_org = org['id']
|
validated_org = org['id']
|
||||||
except (RemoteResourceError, ValidationError):
|
except (RemoteResourceError, ValidationError):
|
||||||
log.error('Could not get remote org %s' % remote_org)
|
log.error('Could not get remote org %s', remote_org)
|
||||||
|
|
||||||
package_dict['owner_org'] = validated_org or local_org
|
package_dict['owner_org'] = validated_org or local_org
|
||||||
|
|
||||||
|
@ -417,12 +426,12 @@ class CKANHarvester(HarvesterBase):
|
||||||
del package_dict['extras'][key]
|
del package_dict['extras'][key]
|
||||||
|
|
||||||
# Set default extras if needed
|
# Set default extras if needed
|
||||||
default_extras = self.config.get('default_extras',{})
|
default_extras = self.config.get('default_extras', {})
|
||||||
if default_extras:
|
if default_extras:
|
||||||
override_extras = self.config.get('override_extras',False)
|
override_extras = self.config.get('override_extras', False)
|
||||||
if not 'extras' in package_dict:
|
if not 'extras' in package_dict:
|
||||||
package_dict['extras'] = {}
|
package_dict['extras'] = {}
|
||||||
for key,value in default_extras.iteritems():
|
for key, value in default_extras.iteritems():
|
||||||
if not key in package_dict['extras'] or override_extras:
|
if not key in package_dict['extras'] or override_extras:
|
||||||
# Look for replacement strings
|
# Look for replacement strings
|
||||||
if isinstance(value,basestring):
|
if isinstance(value,basestring):
|
||||||
|
@ -466,9 +475,10 @@ class CKANHarvester(HarvesterBase):
|
||||||
pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER)
|
pkg_role = model.PackageRole(package=package, user=user, role=model.Role.READER)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except ValidationError,e:
|
except ValidationError, e:
|
||||||
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
|
self._save_object_error('Invalid package with GUID %s: %r' %
|
||||||
harvest_object, 'Import')
|
(harvest_object.guid, e.error_dict),
|
||||||
|
harvest_object, 'Import')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self._save_object_error('%r'%e,harvest_object,'Import')
|
self._save_object_error('%r'%e,harvest_object,'Import')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue