From 0d79252a09b70e96685a8a1d2a0f3f34802d3db3 Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 22 Jan 2013 16:43:25 +0000 Subject: [PATCH 1/2] Add command for reindex all harvest sources --- ckanext/harvest/commands/harvester.py | 10 +++++++++ ckanext/harvest/logic/action/update.py | 31 ++++++++++++++++++++++++-- ckanext/harvest/logic/auth/update.py | 11 +++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 654a880..41d5a8d 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -57,6 +57,9 @@ class Harvester(CkanCommand): harvester job-all - create new harvest jobs for all active sources. + harvester reindex + - reindexes the harvest source datasets + The commands should be run from the ckanext-harvest directory and expect a development.ini file to be present. Most of the time you will specify the config explicitly though:: @@ -136,6 +139,8 @@ class Harvester(CkanCommand): elif cmd == 'harvesters-info': harvesters_info = get_action('harvesters_info_show')() pprint(harvesters_info) + elif cmd == 'reindex': + self.reindex() else: print 'Command %s not recognized' % cmd @@ -282,6 +287,11 @@ class Harvester(CkanCommand): jobs = get_action('harvest_job_create_all')(context,{}) print 'Created %s new harvest jobs' % len(jobs) + def reindex(self): + context = {'model': model, 'user': self.admin_user['name']} + get_action('harvest_sources_reindex')(context,{}) + + def print_harvest_sources(self, sources): if sources: print '' diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 4d66ea7..c2690a3 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -5,6 +5,7 @@ import datetime from sqlalchemy import and_ +from ckan.lib.search.index import PackageSearchIndex from ckan.plugins import PluginImplementations from ckan.logic import get_action from ckanext.harvest.interfaces import IHarvester @@ -201,6 +202,7 @@ def harvest_jobs_run(context,data_dict): # Flag finished jobs as such jobs = harvest_job_list(context,{'source_id':source_id,'status':u'Running'}) if len(jobs): + package_index = PackageSearchIndex() for job in jobs: if job['gather_finished']: objects = session.query(HarvestObject.id) \ @@ -220,8 +222,7 @@ def harvest_jobs_run(context,data_dict): {'id': job_obj.source.id}) if package_dict: - from ckan.lib.search.index import PackageSearchIndex - PackageSearchIndex().index_package(package_dict) + package_index.index_package(package_dict) # Check if there are pending harvest jobs @@ -247,3 +248,29 @@ def harvest_jobs_run(context,data_dict): publisher.close() return sent_jobs +def harvest_sources_reindex(context, data_dict): + ''' + Reindexes all harvest source datasets with the latest status + ''' + log.info('Reindexing all harvest sources') + check_access('harvest_sources_reindex', context, data_dict) + + model = context['model'] + + packages = model.Session.query(model.Package) \ + .filter(model.Package.type==DATASET_TYPE_NAME) \ + .filter(model.Package.state==u'active') \ + .all() + + package_index = PackageSearchIndex() + for package in packages: + if 'extras_as_string'in context: + del context['extras_as_string'] + context.update({'validate': False, 'ignore_auth': True}) + package_dict = logic.get_action('package_show')(context, + {'id': package.id}) + log.debug('Updating search index for harvest source {0}'.format(package.id)) + package_index.index_package(package_dict, defer_commit=True) + + package_index.commit() + log.info('Updated search index for {0} harvest sources'.format(len(packages))) diff --git a/ckanext/harvest/logic/auth/update.py b/ckanext/harvest/logic/auth/update.py index a5bf31b..45d13d1 100644 --- a/ckanext/harvest/logic/auth/update.py +++ b/ckanext/harvest/logic/auth/update.py @@ -50,3 +50,14 @@ def harvest_jobs_run(context, data_dict): return {'success': False, 'msg': pt._('Only sysadmins can run the pending harvest jobs')} else: return {'success': True} + +def harvest_sources_reindex(context, data_dict): + ''' + Authorization check for reindexing all harvest sources + + Only sysadmins can do it + ''' + if not user_is_sysadmin(context): + return {'success': False, 'msg': pt._('Only sysadmins can reindex all harvest sources')} + else: + return {'success': True} From cca554c5ec59b7ef0505b234f7395ccdfb3db6ec Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 5 Feb 2013 12:33:56 +0000 Subject: [PATCH 2/2] Fix typo and add missing column on v3 migration script --- ckanext/harvest/model/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index e62c68d..0133cf7 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -364,7 +364,7 @@ def migrate_v3(): ALTER TABLE harvest_object ADD COLUMN import_started timestamp without time zone, ADD COLUMN import_finished timestamp without time zone, - ADD COLUMN "state" text; + ADD COLUMN "state" text, ADD COLUMN "report_status" text; ALTER TABLE harvest_source @@ -385,6 +385,10 @@ UPDATE harvest_source set frequency = 'MANUAL'; ALTER TABLE harvest_object DROP CONSTRAINT harvest_object_package_id_fkey; ALTER TABLE harvest_object ADD CONSTRAINT harvest_object_package_id_fkey FOREIGN KEY (package_id) REFERENCES package(id) DEFERRABLE; + +ALTER TABLE harvest_object_error + ADD COLUMN line integer; + """ conn.execute(statement) Session.commit()