From a59ab4b5ff917b7b843d017db4fea3a972743881 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 27 Jun 2014 16:39:02 +0100 Subject: [PATCH 1/2] [#91] Consolidate all harvest source reindex code in a single action Make it available to users with permissions on the harvest source --- ckanext/harvest/logic/action/update.py | 52 ++++++++++++++------------ ckanext/harvest/logic/auth/update.py | 8 ++++ 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index baf2c14..c7fd5c8 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -133,13 +133,7 @@ def harvest_source_clear(context,data_dict): model.Session.execute(sql) # Refresh the index for this source to update the status object - context.update({'validate': False, 'ignore_auth': True}) - package_dict = logic.get_action('package_show')(context, - {'id': harvest_source_id}) - - if package_dict: - package_index = PackageSearchIndex() - package_index.index_package(package_dict) + get_action('harvest_source_reindex')(context, {'id': harvest_source_id}) return {'id': harvest_source_id} @@ -326,14 +320,8 @@ def harvest_jobs_run(context,data_dict): job_obj.save() # Reindex the harvest source dataset so it has the latest # status - if 'extras_as_string'in context: - del context['extras_as_string'] - context.update({'validate': False, 'ignore_auth': True}) - package_dict = logic.get_action('package_show')(context, - {'id': job_obj.source.id}) - - if package_dict: - package_index.index_package(package_dict) + get_action('harvest_source_reindex')(reindex_context, + {'id': job_obj.source.id}) # resubmit old redis tasks resubmit_jobs() @@ -361,6 +349,8 @@ def harvest_jobs_run(context,data_dict): publisher.close() return sent_jobs + +@logic.side_effect_free def harvest_sources_reindex(context, data_dict): ''' Reindexes all harvest source datasets with the latest status @@ -376,14 +366,30 @@ def harvest_sources_reindex(context, data_dict): .all() package_index = PackageSearchIndex() + + reindex_context = {'defer_commit': True} for package in packages: - if 'extras_as_string'in context: - del context['extras_as_string'] - context.update({'ignore_auth': True}) - package_dict = logic.get_action('harvest_source_show')(context, - {'id': package.id}) - log.debug('Updating search index for harvest source {0}'.format(package.id)) - package_index.index_package(package_dict, defer_commit=True) + get_action('harvest_source_reindex')(reindex_context, {'id': package.id}) package_index.commit() - log.info('Updated search index for {0} harvest sources'.format(len(packages))) + + return True + +@logic.side_effect_free +def harvest_source_reindex(context, data_dict): + '''Reindex a single harvest source''' + + harvest_source_id = logic.get_or_bust(data_dict, 'id') + defer_commit = context.get('defer_commit', False) + + if 'extras_as_string'in context: + del context['extras_as_string'] + context.update({'ignore_auth': True}) + package_dict = logic.get_action('harvest_source_show')(context, + {'id': harvest_source_id}) + log.debug('Updating search index for harvest source {0}'.format(harvest_source_id)) + + package_index = PackageSearchIndex() + package_index.index_package(package_dict, defer_commit=defer_commit) + + return True diff --git a/ckanext/harvest/logic/auth/update.py b/ckanext/harvest/logic/auth/update.py index c8181a2..f394ba6 100644 --- a/ckanext/harvest/logic/auth/update.py +++ b/ckanext/harvest/logic/auth/update.py @@ -68,3 +68,11 @@ def harvest_sources_reindex(context, data_dict): return {'success': False, 'msg': pt._('Only sysadmins can reindex all harvest sources')} else: return {'success': True} + +def harvest_source_reindex(context, data_dict): + ''' + Authorization check for reindexing a harvest source + + It forwards to harvest_source_update + ''' + return harvest_source_update(context, data_dict) From 58a873ac7a31e7bfc956967d4e6f49368e3799e7 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 27 Jun 2014 16:54:39 +0100 Subject: [PATCH 2/2] [#91] Remove config fields from source dict before indexing We don't need them and will avoid indexing errors --- ckanext/harvest/logic/action/update.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index c7fd5c8..ffdacf2 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -1,4 +1,5 @@ import hashlib +import json import logging import datetime @@ -389,7 +390,14 @@ def harvest_source_reindex(context, data_dict): {'id': harvest_source_id}) log.debug('Updating search index for harvest source {0}'.format(harvest_source_id)) + # Remove configuration values + new_dict = {} + if package_dict.get('config'): + config = json.loads(package_dict['config']) + for key, value in package_dict.iteritems(): + if key not in config: + new_dict[key] = value package_index = PackageSearchIndex() - package_index.index_package(package_dict, defer_commit=defer_commit) + package_index.index_package(new_dict, defer_commit=defer_commit) return True