Add option to re-import based on guid.

This commit is contained in:
David Read 2015-12-08 16:17:39 +00:00
parent 121e8bd918
commit 031e680b6c
2 changed files with 18 additions and 4 deletions

View File

@ -67,7 +67,7 @@ class Harvester(CkanCommand):
WARNING: if using Redis, this command purges all data in the current WARNING: if using Redis, this command purges all data in the current
Redis database Redis database
harvester [-j] [-o] [--segments={segments}] import [{source-id}] harvester [-j] [-o|-g|-p {id/guid}] [--segments={segments}] import [{source-id}]
- perform the import stage with the last fetched objects, for a certain - perform the import stage with the last fetched objects, for a certain
source or a single harvest object. Please note that no objects will source or a single harvest object. Please note that no objects will
be fetched from the remote server. It will only affect the objects be fetched from the remote server. It will only affect the objects
@ -75,6 +75,7 @@ class Harvester(CkanCommand):
To import a particular harvest source, specify its id as an argument. To import a particular harvest source, specify its id as an argument.
To import a particular harvest object use the -o option. To import a particular harvest object use the -o option.
To import a particular guid use the -g option.
To import a particular package use the -p option. To import a particular package use the -p option.
You will need to specify the -j flag in cases where the datasets are You will need to specify the -j flag in cases where the datasets are
@ -111,10 +112,13 @@ class Harvester(CkanCommand):
action='store_true', default=False, help='Do not join harvest objects to existing datasets') action='store_true', default=False, help='Do not join harvest objects to existing datasets')
self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id', self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id',
default=False, help='Id of the harvest object to which perfom the import stage') default=False, help='Id of the harvest object to which perform the import stage')
self.parser.add_option('-p', '--package-id', dest='package_id', self.parser.add_option('-p', '--package-id', dest='package_id',
default=False, help='Id of the package whose harvest object to perfom the import stage for') default=False, help='Id of the package whose harvest object to perform the import stage for')
self.parser.add_option('-g', '--guid', dest='guid',
default=False, help='Guid of the harvest object to which perform the import stage for')
self.parser.add_option('--segments', dest='segments', self.parser.add_option('--segments', dest='segments',
default=False, help= default=False, help=
@ -445,6 +449,7 @@ class Harvester(CkanCommand):
'source_id': source_id, 'source_id': source_id,
'harvest_object_id': self.options.harvest_object_id, 'harvest_object_id': self.options.harvest_object_id,
'package_id': self.options.package_id, 'package_id': self.options.package_id,
'guid': self.options.guid,
}) })
print '%s objects reimported' % objs_count print '%s objects reimported' % objs_count

View File

@ -262,6 +262,8 @@ def harvest_objects_import(context, data_dict):
:param source_id: the id of the harvest source to import :param source_id: the id of the harvest source to import
:type source_id: string :type source_id: string
:param guid: the guid of the harvest object to import
:type guid: string
:param harvest_object_id: the id of the harvest object to import :param harvest_object_id: the id of the harvest object to import
:type harvest_object_id: string :type harvest_object_id: string
:param package_id: the id or name of the package to import :param package_id: the id or name of the package to import
@ -273,6 +275,7 @@ def harvest_objects_import(context, data_dict):
model = context['model'] model = context['model']
session = context['session'] session = context['session']
source_id = data_dict.get('source_id') source_id = data_dict.get('source_id')
guid = data_dict.get('guid')
harvest_object_id = data_dict.get('harvest_object_id') harvest_object_id = data_dict.get('harvest_object_id')
package_id_or_name = data_dict.get('package_id') package_id_or_name = data_dict.get('package_id')
@ -280,7 +283,13 @@ def harvest_objects_import(context, data_dict):
join_datasets = context.get('join_datasets', True) join_datasets = context.get('join_datasets', True)
if source_id: if guid:
last_objects_ids = \
session.query(HarvestObject.id) \
.filter(HarvestObject.guid == guid) \
.filter(HarvestObject.current == True)
elif source_id:
source = HarvestSource.get(source_id) source = HarvestSource.get(source_id)
if not source: if not source:
log.error('Harvest source %s does not exist', source_id) log.error('Harvest source %s does not exist', source_id)