From 031e680b6c1a5503b416632625a44336dabb71e0 Mon Sep 17 00:00:00 2001 From: David Read Date: Tue, 8 Dec 2015 16:17:39 +0000 Subject: [PATCH] Add option to re-import based on guid. --- ckanext/harvest/commands/harvester.py | 11 ++++++++--- ckanext/harvest/logic/action/update.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 1466c66..de2e73d 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -67,7 +67,7 @@ class Harvester(CkanCommand): WARNING: if using Redis, this command purges all data in the current Redis database - harvester [-j] [-o] [--segments={segments}] import [{source-id}] + harvester [-j] [-o|-g|-p {id/guid}] [--segments={segments}] import [{source-id}] - perform the import stage with the last fetched objects, for a certain source or a single harvest object. Please note that no objects will be fetched from the remote server. It will only affect the objects @@ -75,6 +75,7 @@ class Harvester(CkanCommand): To import a particular harvest source, specify its id as an argument. To import a particular harvest object use the -o option. + To import a particular guid use the -g option. To import a particular package use the -p option. You will need to specify the -j flag in cases where the datasets are @@ -111,10 +112,13 @@ class Harvester(CkanCommand): action='store_true', default=False, help='Do not join harvest objects to existing datasets') self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id', - default=False, help='Id of the harvest object to which perfom the import stage') + default=False, help='Id of the harvest object to which perform the import stage') self.parser.add_option('-p', '--package-id', dest='package_id', - default=False, help='Id of the package whose harvest object to perfom the import stage for') + default=False, help='Id of the package whose harvest object to perform the import stage for') + + self.parser.add_option('-g', '--guid', dest='guid', + default=False, help='Guid of the harvest object to which perform the import stage for') self.parser.add_option('--segments', dest='segments', default=False, help= @@ -445,6 +449,7 @@ class Harvester(CkanCommand): 'source_id': source_id, 'harvest_object_id': self.options.harvest_object_id, 'package_id': self.options.package_id, + 'guid': self.options.guid, }) print '%s objects reimported' % objs_count diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index afeb131..5bb380d 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -262,6 +262,8 @@ def harvest_objects_import(context, data_dict): :param source_id: the id of the harvest source to import :type source_id: string + :param guid: the guid of the harvest object to import + :type guid: string :param harvest_object_id: the id of the harvest object to import :type harvest_object_id: string :param package_id: the id or name of the package to import @@ -273,6 +275,7 @@ def harvest_objects_import(context, data_dict): model = context['model'] session = context['session'] source_id = data_dict.get('source_id') + guid = data_dict.get('guid') harvest_object_id = data_dict.get('harvest_object_id') package_id_or_name = data_dict.get('package_id') @@ -280,7 +283,13 @@ def harvest_objects_import(context, data_dict): join_datasets = context.get('join_datasets', True) - if source_id: + if guid: + last_objects_ids = \ + session.query(HarvestObject.id) \ + .filter(HarvestObject.guid == guid) \ + .filter(HarvestObject.current == True) + + elif source_id: source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id)