From 7459358fa1527c037f16083dfc09ca97fc7e4c4b Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 15 May 2014 16:30:30 +0100 Subject: [PATCH] Support for single import commands We are now able to run `paster harvester import` for a single harvest object or for a single dataset, providing ids or name. --- ckanext/harvest/commands/harvester.py | 25 +++++++++++++++++++------ ckanext/harvest/logic/action/update.py | 17 +++++++++++++++-- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 947e3c6..fadd669 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -42,10 +42,13 @@ class Harvester(CkanCommand): harvester purge_queues - removes all jobs from fetch and gather queue - harvester [-j] [--segments={segments}] import [{source-id}] - - perform the import stage with the last fetched objects, optionally belonging to a certain source. - Please note that no objects will be fetched from the remote server. It will only affect - the last fetched objects already present in the database. + harvester [-j] [-o] [--segments={segments}] import [{source-id}] + - perform the import stage with the last fetched objects, for a certain + source or a single harvest object. Please note that no objects will + be fetched from the remote server. It will only affect the objects + already present in the database. + + To perform it on a particular object use the -o flag. If the -j flag is provided, the objects are not joined to existing datasets. This may be useful when importing objects for the first time. @@ -79,6 +82,12 @@ class Harvester(CkanCommand): self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets', action='store_true', default=False, help='Do not join harvest objects to existing datasets') + self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id', + default=False, help='Id of the harvest object to which perfom the import stage') + + self.parser.add_option('-p', '--package-id', dest='package_id', + default=False, help='Id of the package whose harvest object to perfom the import stage for') + self.parser.add_option('--segments', dest='segments', default=False, help= '''A string containing hex digits that represent which of @@ -291,9 +300,13 @@ class Harvester(CkanCommand): 'segments': self.options.segments} - objs = get_action('harvest_objects_import')(context,{'source_id':source_id}) + objs_count = get_action('harvest_objects_import')(context,{ + 'source_id': source_id, + 'harvest_object_id': self.options.harvest_object_id, + 'package_id': self.options.package_id, + }) - print '%s objects reimported' % len(objs) + print '%s objects reimported' % objs_count def create_harvest_job_all(self): context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 206cf82..baf2c14 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -5,7 +5,7 @@ import datetime from pylons import config from paste.deploy.converters import asbool -from sqlalchemy import and_ +from sqlalchemy import and_, or_ from ckan.lib.search.index import PackageSearchIndex from ckan.plugins import PluginImplementations @@ -185,6 +185,8 @@ def harvest_objects_import(context,data_dict): model = context['model'] session = context['session'] source_id = data_dict.get('source_id',None) + harvest_object_id = data_dict.get('harvest_object_id',None) + package_id_or_name = data_dict.get('package_id',None) segments = context.get('segments',None) @@ -205,9 +207,20 @@ def harvest_objects_import(context,data_dict): .filter(HarvestObject.source==source) \ .filter(HarvestObject.current==True) + elif harvest_object_id: + last_objects_ids = session.query(HarvestObject.id) \ + .filter(HarvestObject.id==harvest_object_id) + elif package_id_or_name: + last_objects_ids = session.query(HarvestObject.id) \ + .join(Package) \ + .filter(HarvestObject.current==True) \ + .filter(Package.state==u'active') \ + .filter(or_(Package.id==package_id_or_name, + Package.name==package_id_or_name)) + join_datasets = False else: last_objects_ids = session.query(HarvestObject.id) \ - .filter(HarvestObject.current==True) \ + .filter(HarvestObject.current==True) if join_datasets: last_objects_ids = last_objects_ids.join(Package) \