Allow not linking to datasets when importing records
With the -j flag, harvest objects are not linked to datasets when importing. This is useful sometimes when importing records for the first time.
This commit is contained in:
parent
203bcb053b
commit
7011efe5dc
|
@ -40,11 +40,14 @@ class Harvester(CkanCommand):
|
|||
harvester fetch_consumer
|
||||
- starts the consumer for the fetching queue
|
||||
|
||||
harvester import [{source-id}]
|
||||
harvester [-j] import [{source-id}]
|
||||
- perform the import stage with the last fetched objects, optionally belonging to a certain source.
|
||||
Please note that no objects will be fetched from the remote server. It will only affect
|
||||
the last fetched objects already present in the database.
|
||||
|
||||
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
|
||||
when importing objects for the first time.
|
||||
|
||||
harvester job-all
|
||||
- create new harvest jobs for all active sources.
|
||||
|
||||
|
@ -61,6 +64,13 @@ class Harvester(CkanCommand):
|
|||
max_args = 6
|
||||
min_args = 0
|
||||
|
||||
def __init__(self,name):
|
||||
|
||||
super(Harvester,self).__init__(name)
|
||||
|
||||
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
|
||||
action='store_true', default=False, help='Do not join harvest objects to existing datasets')
|
||||
|
||||
def command(self):
|
||||
self._load_config()
|
||||
|
||||
|
@ -231,7 +241,11 @@ class Harvester(CkanCommand):
|
|||
source_id = unicode(self.args[1])
|
||||
else:
|
||||
source_id = None
|
||||
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name']}
|
||||
|
||||
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name'],
|
||||
'join_datasets': not self.options.no_join_datasets}
|
||||
|
||||
|
||||
objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
|
||||
|
||||
print '%s objects reimported' % len(objs)
|
||||
|
|
|
@ -81,6 +81,8 @@ def harvest_objects_import(context,data_dict):
|
|||
session = context['session']
|
||||
source_id = data_dict.get('source_id',None)
|
||||
|
||||
join_datasets = context.get('join_datasets',True)
|
||||
|
||||
if source_id:
|
||||
source = HarvestSource.get(source_id)
|
||||
if not source:
|
||||
|
@ -92,17 +94,19 @@ def harvest_objects_import(context,data_dict):
|
|||
raise Exception('This harvest source is not active')
|
||||
|
||||
last_objects_ids = session.query(HarvestObject.id) \
|
||||
.join(HarvestSource).join(Package) \
|
||||
.join(HarvestSource) \
|
||||
.filter(HarvestObject.source==source) \
|
||||
.filter(HarvestObject.current==True) \
|
||||
.filter(Package.state==u'active') \
|
||||
.all()
|
||||
.filter(HarvestObject.current==True)
|
||||
|
||||
else:
|
||||
last_objects_ids = session.query(HarvestObject.id) \
|
||||
.join(Package) \
|
||||
.filter(HarvestObject.current==True) \
|
||||
.filter(Package.state==u'active') \
|
||||
.all()
|
||||
|
||||
if join_datasets:
|
||||
last_objects_ids = last_objects_ids.join(Package) \
|
||||
.filter(Package.state==u'active')
|
||||
|
||||
last_objects_ids = last_objects_ids.all()
|
||||
|
||||
last_objects = []
|
||||
for obj_id in last_objects_ids:
|
||||
|
@ -114,7 +118,7 @@ def harvest_objects_import(context,data_dict):
|
|||
harvester.import_stage(obj)
|
||||
break
|
||||
last_objects.append(harvest_object_dictize(obj,context))
|
||||
log.info('Harvest objects imported: %r', last_objects)
|
||||
log.info('Harvest objects imported: %s', len(last_objects))
|
||||
return last_objects
|
||||
|
||||
def harvest_jobs_run(context,data_dict):
|
||||
|
|
Loading…
Reference in New Issue