From ba486a9482894e6216f7fe9279195beb64e07606 Mon Sep 17 00:00:00 2001 From: joetsoi Date: Wed, 27 Feb 2013 11:34:09 +0000 Subject: [PATCH] add indexing of datasets whilst harvesting --- ckanext/harvest/commands/harvester.py | 4 ++-- ckanext/harvest/harvesters/base.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 1088e8d..6aeeaa0 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -251,7 +251,7 @@ class Harvester(CkanCommand): self.print_harvest_job(job) jobs = get_action('harvest_job_list')(context,{'status':u'New'}) - self.print_there_are('harvest jobs', jobs, condition=u'New') + self.print_there_are('harvest job', jobs, condition=u'New') def list_harvest_jobs(self): context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} @@ -319,7 +319,7 @@ class Harvester(CkanCommand): print ' Job id: %s' % job['id'] print ' status: %s' % job['status'] print ' source: %s' % job['source_id'] - print ' objects: %s' % len(job['objects']) + print ' objects: %s' % len(job.get('objects', [])) print 'gather_errors: %s' % len(job['gather_errors']) if (len(job['gather_errors']) > 0): diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index a7876ac..913442d 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -5,6 +5,7 @@ import uuid from sqlalchemy.sql import update,and_, bindparam from sqlalchemy.exc import InvalidRequestError +from ckan import plugins as p from ckan import model from ckan.model import Session, Package from ckan.logic import ValidationError, NotFound, get_action @@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin): package_dict['name'] = self._gen_new_name(package_dict['title']) log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) + harvest_object.current = True + harvest_object.package_id = package_dict['id'] + # Defer constraints and flush so the dataset can be indexed with + # the harvest object id (on the after_show hook from the harvester + # plugin) + harvest_object.add() + + model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() + new_package = get_action('package_create_rest')(context, package_dict) - harvest_object.package_id = new_package['id'] # Flag the other objects linking to this package as not current anymore from ckanext.harvest.model import harvest_object_table