diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 1088e8d..efd32b3 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -251,7 +251,7 @@ class Harvester(CkanCommand): self.print_harvest_job(job) jobs = get_action('harvest_job_list')(context,{'status':u'New'}) - self.print_there_are('harvest jobs', jobs, condition=u'New') + self.print_there_are('harvest job', jobs, condition=u'New') def list_harvest_jobs(self): context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} @@ -319,11 +319,11 @@ class Harvester(CkanCommand): print ' Job id: %s' % job['id'] print ' status: %s' % job['status'] print ' source: %s' % job['source_id'] - print ' objects: %s' % len(job['objects']) + print ' objects: %s' % len(job.get('objects', [])) - print 'gather_errors: %s' % len(job['gather_errors']) + print 'gather_errors: %s' % len(job.get('gather_errors', [])) if (len(job['gather_errors']) > 0): - for error in job['gather_errors']: + for error in job.get('gather_errors', []): print ' %s' % error['message'] print '' diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index a7876ac..913442d 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -5,6 +5,7 @@ import uuid from sqlalchemy.sql import update,and_, bindparam from sqlalchemy.exc import InvalidRequestError +from ckan import plugins as p from ckan import model from ckan.model import Session, Package from ckan.logic import ValidationError, NotFound, get_action @@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin): package_dict['name'] = self._gen_new_name(package_dict['title']) log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) + harvest_object.current = True + harvest_object.package_id = package_dict['id'] + # Defer constraints and flush so the dataset can be indexed with + # the harvest object id (on the after_show hook from the harvester + # plugin) + harvest_object.add() + + model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() + new_package = get_action('package_create_rest')(context, package_dict) - harvest_object.package_id = new_package['id'] # Flag the other objects linking to this package as not current anymore from ckanext.harvest.model import harvest_object_table diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 4919098..ec13a90 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -35,15 +35,12 @@ class CKANHarvester(HarvesterBase): url = url, ) - try: - api_key = self.config.get('api_key',None) - if api_key: - http_request.add_header('Authorization',api_key) - http_response = urllib2.urlopen(http_request) + api_key = self.config.get('api_key',None) + if api_key: + http_request.add_header('Authorization',api_key) + http_response = urllib2.urlopen(http_request) - return http_response.read() - except Exception, e: - raise e + return http_response.read() def _set_config(self,config_str): if config_str: @@ -135,7 +132,7 @@ class CKANHarvester(HarvesterBase): get_all_packages = False # Request only the packages modified since last harvest job - last_time = harvest_job.gather_started.isoformat() + last_time = previous_job.gather_finished.isoformat() url = base_search_url + '/revision?since_time=%s' % last_time try: @@ -152,7 +149,7 @@ class CKANHarvester(HarvesterBase): continue revision = json.loads(content) - for package_id in revision.packages: + for package_id in revision['packages']: if not package_id in package_ids: package_ids.append(package_id) else: