Allow not linking to datasets when importing records
With the -j flag, harvest objects are not linked to datasets when importing. This is useful sometimes when importing records for the first time.
This commit is contained in:
parent
203bcb053b
commit
7011efe5dc
|
@ -40,11 +40,14 @@ class Harvester(CkanCommand):
|
||||||
harvester fetch_consumer
|
harvester fetch_consumer
|
||||||
- starts the consumer for the fetching queue
|
- starts the consumer for the fetching queue
|
||||||
|
|
||||||
harvester import [{source-id}]
|
harvester [-j] import [{source-id}]
|
||||||
- perform the import stage with the last fetched objects, optionally belonging to a certain source.
|
- perform the import stage with the last fetched objects, optionally belonging to a certain source.
|
||||||
Please note that no objects will be fetched from the remote server. It will only affect
|
Please note that no objects will be fetched from the remote server. It will only affect
|
||||||
the last fetched objects already present in the database.
|
the last fetched objects already present in the database.
|
||||||
|
|
||||||
|
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
|
||||||
|
when importing objects for the first time.
|
||||||
|
|
||||||
harvester job-all
|
harvester job-all
|
||||||
- create new harvest jobs for all active sources.
|
- create new harvest jobs for all active sources.
|
||||||
|
|
||||||
|
@ -61,6 +64,13 @@ class Harvester(CkanCommand):
|
||||||
max_args = 6
|
max_args = 6
|
||||||
min_args = 0
|
min_args = 0
|
||||||
|
|
||||||
|
def __init__(self,name):
|
||||||
|
|
||||||
|
super(Harvester,self).__init__(name)
|
||||||
|
|
||||||
|
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
|
||||||
|
action='store_true', default=False, help='Do not join harvest objects to existing datasets')
|
||||||
|
|
||||||
def command(self):
|
def command(self):
|
||||||
self._load_config()
|
self._load_config()
|
||||||
|
|
||||||
|
@ -231,7 +241,11 @@ class Harvester(CkanCommand):
|
||||||
source_id = unicode(self.args[1])
|
source_id = unicode(self.args[1])
|
||||||
else:
|
else:
|
||||||
source_id = None
|
source_id = None
|
||||||
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name']}
|
|
||||||
|
context = {'model': model, 'session':model.Session, 'user': self.admin_user['name'],
|
||||||
|
'join_datasets': not self.options.no_join_datasets}
|
||||||
|
|
||||||
|
|
||||||
objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
|
objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
|
||||||
|
|
||||||
print '%s objects reimported' % len(objs)
|
print '%s objects reimported' % len(objs)
|
||||||
|
|
|
@ -81,6 +81,8 @@ def harvest_objects_import(context,data_dict):
|
||||||
session = context['session']
|
session = context['session']
|
||||||
source_id = data_dict.get('source_id',None)
|
source_id = data_dict.get('source_id',None)
|
||||||
|
|
||||||
|
join_datasets = context.get('join_datasets',True)
|
||||||
|
|
||||||
if source_id:
|
if source_id:
|
||||||
source = HarvestSource.get(source_id)
|
source = HarvestSource.get(source_id)
|
||||||
if not source:
|
if not source:
|
||||||
|
@ -92,17 +94,19 @@ def harvest_objects_import(context,data_dict):
|
||||||
raise Exception('This harvest source is not active')
|
raise Exception('This harvest source is not active')
|
||||||
|
|
||||||
last_objects_ids = session.query(HarvestObject.id) \
|
last_objects_ids = session.query(HarvestObject.id) \
|
||||||
.join(HarvestSource).join(Package) \
|
.join(HarvestSource) \
|
||||||
.filter(HarvestObject.source==source) \
|
.filter(HarvestObject.source==source) \
|
||||||
.filter(HarvestObject.current==True) \
|
.filter(HarvestObject.current==True)
|
||||||
.filter(Package.state==u'active') \
|
|
||||||
.all()
|
|
||||||
else:
|
else:
|
||||||
last_objects_ids = session.query(HarvestObject.id) \
|
last_objects_ids = session.query(HarvestObject.id) \
|
||||||
.join(Package) \
|
|
||||||
.filter(HarvestObject.current==True) \
|
.filter(HarvestObject.current==True) \
|
||||||
.filter(Package.state==u'active') \
|
|
||||||
.all()
|
if join_datasets:
|
||||||
|
last_objects_ids = last_objects_ids.join(Package) \
|
||||||
|
.filter(Package.state==u'active')
|
||||||
|
|
||||||
|
last_objects_ids = last_objects_ids.all()
|
||||||
|
|
||||||
last_objects = []
|
last_objects = []
|
||||||
for obj_id in last_objects_ids:
|
for obj_id in last_objects_ids:
|
||||||
|
@ -114,7 +118,7 @@ def harvest_objects_import(context,data_dict):
|
||||||
harvester.import_stage(obj)
|
harvester.import_stage(obj)
|
||||||
break
|
break
|
||||||
last_objects.append(harvest_object_dictize(obj,context))
|
last_objects.append(harvest_object_dictize(obj,context))
|
||||||
log.info('Harvest objects imported: %r', last_objects)
|
log.info('Harvest objects imported: %s', len(last_objects))
|
||||||
return last_objects
|
return last_objects
|
||||||
|
|
||||||
def harvest_jobs_run(context,data_dict):
|
def harvest_jobs_run(context,data_dict):
|
||||||
|
|
Loading…
Reference in New Issue