Allow not linking to datasets when importing records

With the -j flag, harvest objects are not linked to datasets when
importing. This is useful sometimes when importing records for the first
time.
This commit is contained in:
amercader 2012-07-30 12:11:55 +01:00
parent 203bcb053b
commit 7011efe5dc
2 changed files with 28 additions and 10 deletions

View File

@ -40,11 +40,14 @@ class Harvester(CkanCommand):
harvester fetch_consumer
- starts the consumer for the fetching queue
harvester import [{source-id}]
harvester [-j] import [{source-id}]
- perform the import stage with the last fetched objects, optionally belonging to a certain source.
Please note that no objects will be fetched from the remote server. It will only affect
the last fetched objects already present in the database.
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
when importing objects for the first time.
harvester job-all
- create new harvest jobs for all active sources.
@ -61,6 +64,13 @@ class Harvester(CkanCommand):
max_args = 6
min_args = 0
def __init__(self,name):
super(Harvester,self).__init__(name)
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
action='store_true', default=False, help='Do not join harvest objects to existing datasets')
def command(self):
self._load_config()
@ -231,7 +241,11 @@ class Harvester(CkanCommand):
source_id = unicode(self.args[1])
else:
source_id = None
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name']}
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name'],
'join_datasets': not self.options.no_join_datasets}
objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
print '%s objects reimported' % len(objs)

View File

@ -81,6 +81,8 @@ def harvest_objects_import(context,data_dict):
session = context['session']
source_id = data_dict.get('source_id',None)
join_datasets = context.get('join_datasets',True)
if source_id:
source = HarvestSource.get(source_id)
if not source:
@ -92,17 +94,19 @@ def harvest_objects_import(context,data_dict):
raise Exception('This harvest source is not active')
last_objects_ids = session.query(HarvestObject.id) \
.join(HarvestSource).join(Package) \
.join(HarvestSource) \
.filter(HarvestObject.source==source) \
.filter(HarvestObject.current==True) \
.filter(Package.state==u'active') \
.all()
.filter(HarvestObject.current==True)
else:
last_objects_ids = session.query(HarvestObject.id) \
.join(Package) \
.filter(HarvestObject.current==True) \
.filter(Package.state==u'active') \
.all()
if join_datasets:
last_objects_ids = last_objects_ids.join(Package) \
.filter(Package.state==u'active')
last_objects_ids = last_objects_ids.all()
last_objects = []
for obj_id in last_objects_ids:
@ -114,7 +118,7 @@ def harvest_objects_import(context,data_dict):
harvester.import_stage(obj)
break
last_objects.append(harvest_object_dictize(obj,context))
log.info('Harvest objects imported: %r', last_objects)
log.info('Harvest objects imported: %s', len(last_objects))
return last_objects
def harvest_jobs_run(context,data_dict):