add indexing of datasets whilst harvesting

This commit is contained in:
joetsoi 2013-02-27 11:34:09 +00:00
parent 348f936601
commit ba486a9482
2 changed files with 13 additions and 3 deletions

View File

@ -251,7 +251,7 @@ class Harvester(CkanCommand):
self.print_harvest_job(job)
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
self.print_there_are('harvest jobs', jobs, condition=u'New')
self.print_there_are('harvest job', jobs, condition=u'New')
def list_harvest_jobs(self):
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
@ -319,7 +319,7 @@ class Harvester(CkanCommand):
print ' Job id: %s' % job['id']
print ' status: %s' % job['status']
print ' source: %s' % job['source_id']
print ' objects: %s' % len(job['objects'])
print ' objects: %s' % len(job.get('objects', []))
print 'gather_errors: %s' % len(job['gather_errors'])
if (len(job['gather_errors']) > 0):

View File

@ -5,6 +5,7 @@ import uuid
from sqlalchemy.sql import update,and_, bindparam
from sqlalchemy.exc import InvalidRequestError
from ckan import plugins as p
from ckan import model
from ckan.model import Session, Package
from ckan.logic import ValidationError, NotFound, get_action
@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin):
package_dict['name'] = self._gen_new_name(package_dict['title'])
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
harvest_object.current = True
harvest_object.package_id = package_dict['id']
# Defer constraints and flush so the dataset can be indexed with
# the harvest object id (on the after_show hook from the harvester
# plugin)
harvest_object.add()
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
model.Session.flush()
new_package = get_action('package_create_rest')(context, package_dict)
harvest_object.package_id = new_package['id']
# Flag the other objects linking to this package as not current anymore
from ckanext.harvest.model import harvest_object_table