commit
b485aa97e5
10
README.rst
10
README.rst
|
@ -585,7 +585,8 @@ following steps with the one you are using.
|
||||||
describe the tasks that need to be monitored. This configuration files are
|
describe the tasks that need to be monitored. This configuration files are
|
||||||
stored in ``/etc/supervisor/conf.d``.
|
stored in ``/etc/supervisor/conf.d``.
|
||||||
|
|
||||||
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and copy the following contents::
|
Create a file named ``/etc/supervisor/conf.d/ckan_harvesting.conf``, and
|
||||||
|
copy the following contents::
|
||||||
|
|
||||||
|
|
||||||
; ===============================
|
; ===============================
|
||||||
|
@ -676,10 +677,11 @@ following steps with the one you are using.
|
||||||
|
|
||||||
sudo crontab -e -u ckan
|
sudo crontab -e -u ckan
|
||||||
|
|
||||||
Note that we are running this command as the same user we configured the processes to be run with
|
Note that we are running this command as the same user we configured the
|
||||||
(`ckan` in our example).
|
processes to be run with (`ckan` in our example).
|
||||||
|
|
||||||
Paste this line into your crontab, again replacing the paths to paster and the ini file with yours::
|
Paste this line into your crontab, again replacing the paths to paster and
|
||||||
|
the ini file with yours::
|
||||||
|
|
||||||
# m h dom mon dow command
|
# m h dom mon dow command
|
||||||
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini
|
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/std/std.ini
|
||||||
|
|
|
@ -8,7 +8,6 @@ from pylons import config
|
||||||
from ckan import plugins as p
|
from ckan import plugins as p
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
|
from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
|
||||||
from ckan.logic import ValidationError, NotFound, get_action
|
|
||||||
|
|
||||||
from ckan.logic.schema import default_create_package_schema
|
from ckan.logic.schema import default_create_package_schema
|
||||||
from ckan.lib.navl.validators import ignore_missing, ignore
|
from ckan.lib.navl.validators import ignore_missing, ignore
|
||||||
|
@ -288,7 +287,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
context.update({'id':package_dict['id']})
|
context.update({'id':package_dict['id']})
|
||||||
package_dict.setdefault('name',
|
package_dict.setdefault('name',
|
||||||
existing_package_dict['name'])
|
existing_package_dict['name'])
|
||||||
new_package = get_action('package_update_rest')(context, package_dict)
|
new_package = p.toolkit.get_action('package_update_rest')(context, package_dict)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
|
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
|
||||||
|
@ -309,7 +308,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
harvest_object.current = True
|
harvest_object.current = True
|
||||||
harvest_object.save()
|
harvest_object.save()
|
||||||
|
|
||||||
except NotFound:
|
except p.toolkit.ObjectNotFound:
|
||||||
# Package needs to be created
|
# Package needs to be created
|
||||||
|
|
||||||
# Get rid of auth audit on the context otherwise we'll get an
|
# Get rid of auth audit on the context otherwise we'll get an
|
||||||
|
@ -333,13 +332,13 @@ class HarvesterBase(SingletonPlugin):
|
||||||
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
||||||
model.Session.flush()
|
model.Session.flush()
|
||||||
|
|
||||||
new_package = get_action('package_create_rest')(context, package_dict)
|
new_package = p.toolkit.get_action('package_create_rest')(context, package_dict)
|
||||||
|
|
||||||
Session.commit()
|
Session.commit()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except ValidationError,e:
|
except p.toolkit.ValidationError, e:
|
||||||
log.exception(e)
|
log.exception(e)
|
||||||
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
|
self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
@ -352,5 +351,5 @@ class HarvesterBase(SingletonPlugin):
|
||||||
data_dict = {'id': package_dict['id']}
|
data_dict = {'id': package_dict['id']}
|
||||||
package_show_context = {'model': model, 'session': Session,
|
package_show_context = {'model': model, 'session': Session,
|
||||||
'ignore_auth': True}
|
'ignore_auth': True}
|
||||||
return get_action('package_show')(
|
return p.toolkit.get_action('package_show')(
|
||||||
package_show_context, data_dict)
|
package_show_context, data_dict)
|
||||||
|
|
|
@ -17,6 +17,7 @@ log = logging.getLogger(__name__)
|
||||||
|
|
||||||
from base import HarvesterBase
|
from base import HarvesterBase
|
||||||
|
|
||||||
|
|
||||||
class CKANHarvester(HarvesterBase):
|
class CKANHarvester(HarvesterBase):
|
||||||
'''
|
'''
|
||||||
A Harvester for CKAN instances
|
A Harvester for CKAN instances
|
||||||
|
@ -61,18 +62,20 @@ class CKANHarvester(HarvesterBase):
|
||||||
content = self._get_content(url)
|
content = self._get_content(url)
|
||||||
return json.loads(content)
|
return json.loads(content)
|
||||||
except (ContentFetchError, ValueError):
|
except (ContentFetchError, ValueError):
|
||||||
log.debug('Could not fetch/decode remote group');
|
log.debug('Could not fetch/decode remote group')
|
||||||
raise RemoteResourceError('Could not fetch/decode remote group')
|
raise RemoteResourceError('Could not fetch/decode remote group')
|
||||||
|
|
||||||
def _get_organization(self, base_url, org_name):
|
def _get_organization(self, base_url, org_name):
|
||||||
url = base_url + self._get_action_api_offset() + '/organization_show?id=' + org_name
|
url = base_url + self._get_action_api_offset() + \
|
||||||
|
'/organization_show?id=' + org_name
|
||||||
try:
|
try:
|
||||||
content = self._get_content(url)
|
content = self._get_content(url)
|
||||||
content_dict = json.loads(content)
|
content_dict = json.loads(content)
|
||||||
return content_dict['result']
|
return content_dict['result']
|
||||||
except (ContentFetchError, ValueError, KeyError):
|
except (ContentFetchError, ValueError, KeyError):
|
||||||
log.debug('Could not fetch/decode remote group');
|
log.debug('Could not fetch/decode remote group')
|
||||||
raise RemoteResourceError('Could not fetch/decode remote organization')
|
raise RemoteResourceError(
|
||||||
|
'Could not fetch/decode remote organization')
|
||||||
|
|
||||||
def _set_config(self, config_str):
|
def _set_config(self, config_str):
|
||||||
if config_str:
|
if config_str:
|
||||||
|
@ -117,7 +120,8 @@ class CKANHarvester(HarvesterBase):
|
||||||
context = {'model': model, 'user': c.user}
|
context = {'model': model, 'user': c.user}
|
||||||
for group_name in config_obj['default_groups']:
|
for group_name in config_obj['default_groups']:
|
||||||
try:
|
try:
|
||||||
group = get_action('group_show')(context,{'id':group_name})
|
group = get_action('group_show')(
|
||||||
|
context, {'id': group_name})
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
raise ValueError('Default group not found')
|
raise ValueError('Default group not found')
|
||||||
|
|
||||||
|
@ -129,8 +133,9 @@ class CKANHarvester(HarvesterBase):
|
||||||
# Check if user exists
|
# Check if user exists
|
||||||
context = {'model': model, 'user': c.user}
|
context = {'model': model, 'user': c.user}
|
||||||
try:
|
try:
|
||||||
user = get_action('user_show')(context,{'id':config_obj.get('user')})
|
user = get_action('user_show')(
|
||||||
except NotFound,e:
|
context, {'id': config_obj.get('user')})
|
||||||
|
except NotFound:
|
||||||
raise ValueError('User not found')
|
raise ValueError('User not found')
|
||||||
|
|
||||||
for key in ('read_only', 'force_all'):
|
for key in ('read_only', 'force_all'):
|
||||||
|
@ -143,9 +148,9 @@ class CKANHarvester(HarvesterBase):
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
def gather_stage(self, harvest_job):
|
def gather_stage(self, harvest_job):
|
||||||
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
|
log.debug('In CKANHarvester gather_stage (%s)',
|
||||||
|
harvest_job.source.url)
|
||||||
get_all_packages = True
|
get_all_packages = True
|
||||||
package_ids = []
|
package_ids = []
|
||||||
|
|
||||||
|
@ -284,16 +289,19 @@ class CKANHarvester(HarvesterBase):
|
||||||
harvest_object.save()
|
harvest_object.save()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def import_stage(self, harvest_object):
|
def import_stage(self, harvest_object):
|
||||||
log.debug('In CKANHarvester import_stage')
|
log.debug('In CKANHarvester import_stage')
|
||||||
|
|
||||||
context = {'model': model, 'session': Session, 'user': self._get_user_name()}
|
context = {'model': model, 'session': Session,
|
||||||
|
'user': self._get_user_name()}
|
||||||
if not harvest_object:
|
if not harvest_object:
|
||||||
log.error('No harvest object received')
|
log.error('No harvest object received')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if harvest_object.content is None:
|
if harvest_object.content is None:
|
||||||
self._save_object_error('Empty content for object %s' % harvest_object.id,
|
self._save_object_error('Empty content for object %s' %
|
||||||
|
harvest_object.id,
|
||||||
harvest_object, 'Import')
|
harvest_object, 'Import')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -311,7 +319,8 @@ class CKANHarvester(HarvesterBase):
|
||||||
if default_tags:
|
if default_tags:
|
||||||
if not 'tags' in package_dict:
|
if not 'tags' in package_dict:
|
||||||
package_dict['tags'] = []
|
package_dict['tags'] = []
|
||||||
package_dict['tags'].extend([t for t in default_tags if t not in package_dict['tags']])
|
package_dict['tags'].extend(
|
||||||
|
[t for t in default_tags if t not in package_dict['tags']])
|
||||||
|
|
||||||
remote_groups = self.config.get('remote_groups', None)
|
remote_groups = self.config.get('remote_groups', None)
|
||||||
if not remote_groups in ('only_local', 'create'):
|
if not remote_groups in ('only_local', 'create'):
|
||||||
|
@ -333,19 +342,19 @@ class CKANHarvester(HarvesterBase):
|
||||||
else:
|
else:
|
||||||
validated_groups.append(group['id'])
|
validated_groups.append(group['id'])
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
log.info('Group %s is not available' % group_name)
|
log.info('Group %s is not available', group_name)
|
||||||
if remote_groups == 'create':
|
if remote_groups == 'create':
|
||||||
try:
|
try:
|
||||||
group = self._get_group(harvest_object.source.url, group_name)
|
group = self._get_group(harvest_object.source.url, group_name)
|
||||||
except RemoteResourceError:
|
except RemoteResourceError:
|
||||||
log.error('Could not get remote group %s' % group_name)
|
log.error('Could not get remote group %s', group_name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
|
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name']:
|
||||||
group.pop(key, None)
|
group.pop(key, None)
|
||||||
|
|
||||||
get_action('group_create')(context, group)
|
get_action('group_create')(context, group)
|
||||||
log.info('Group %s has been newly created' % group_name)
|
log.info('Group %s has been newly created', group_name)
|
||||||
if self.api_version == 1:
|
if self.api_version == 1:
|
||||||
validated_groups.append(group['name'])
|
validated_groups.append(group['name'])
|
||||||
else:
|
else:
|
||||||
|
@ -377,7 +386,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
org = get_action('organization_show')(context, data_dict)
|
org = get_action('organization_show')(context, data_dict)
|
||||||
validated_org = org['id']
|
validated_org = org['id']
|
||||||
except NotFound, e:
|
except NotFound, e:
|
||||||
log.info('Organization %s is not available' % remote_org)
|
log.info('Organization %s is not available', remote_org)
|
||||||
if remote_orgs == 'create':
|
if remote_orgs == 'create':
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
|
@ -390,10 +399,10 @@ class CKANHarvester(HarvesterBase):
|
||||||
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
|
for key in ['packages', 'created', 'users', 'groups', 'tags', 'extras', 'display_name', 'type']:
|
||||||
org.pop(key, None)
|
org.pop(key, None)
|
||||||
get_action('organization_create')(context, org)
|
get_action('organization_create')(context, org)
|
||||||
log.info('Organization %s has been newly created' % remote_org)
|
log.info('Organization %s has been newly created', remote_org)
|
||||||
validated_org = org['id']
|
validated_org = org['id']
|
||||||
except (RemoteResourceError, ValidationError):
|
except (RemoteResourceError, ValidationError):
|
||||||
log.error('Could not get remote org %s' % remote_org)
|
log.error('Could not get remote org %s', remote_org)
|
||||||
|
|
||||||
package_dict['owner_org'] = validated_org or local_org
|
package_dict['owner_org'] = validated_org or local_org
|
||||||
|
|
||||||
|
@ -467,7 +476,8 @@ class CKANHarvester(HarvesterBase):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except ValidationError, e:
|
except ValidationError, e:
|
||||||
self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
|
self._save_object_error('Invalid package with GUID %s: %r' %
|
||||||
|
(harvest_object.guid, e.error_dict),
|
||||||
harvest_object, 'Import')
|
harvest_object, 'Import')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self._save_object_error('%r'%e,harvest_object,'Import')
|
self._save_object_error('%r'%e,harvest_object,'Import')
|
||||||
|
|
Loading…
Reference in New Issue