Support for single import commands
We are now able to run `paster harvester import` for a single harvest object or for a single dataset, providing ids or name.
This commit is contained in:
parent
d6998d8d04
commit
7459358fa1
|
@ -42,10 +42,13 @@ class Harvester(CkanCommand):
|
||||||
harvester purge_queues
|
harvester purge_queues
|
||||||
- removes all jobs from fetch and gather queue
|
- removes all jobs from fetch and gather queue
|
||||||
|
|
||||||
harvester [-j] [--segments={segments}] import [{source-id}]
|
harvester [-j] [-o] [--segments={segments}] import [{source-id}]
|
||||||
- perform the import stage with the last fetched objects, optionally belonging to a certain source.
|
- perform the import stage with the last fetched objects, for a certain
|
||||||
Please note that no objects will be fetched from the remote server. It will only affect
|
source or a single harvest object. Please note that no objects will
|
||||||
the last fetched objects already present in the database.
|
be fetched from the remote server. It will only affect the objects
|
||||||
|
already present in the database.
|
||||||
|
|
||||||
|
To perform it on a particular object use the -o flag.
|
||||||
|
|
||||||
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
|
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
|
||||||
when importing objects for the first time.
|
when importing objects for the first time.
|
||||||
|
@ -79,6 +82,12 @@ class Harvester(CkanCommand):
|
||||||
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
|
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
|
||||||
action='store_true', default=False, help='Do not join harvest objects to existing datasets')
|
action='store_true', default=False, help='Do not join harvest objects to existing datasets')
|
||||||
|
|
||||||
|
self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id',
|
||||||
|
default=False, help='Id of the harvest object to which perfom the import stage')
|
||||||
|
|
||||||
|
self.parser.add_option('-p', '--package-id', dest='package_id',
|
||||||
|
default=False, help='Id of the package whose harvest object to perfom the import stage for')
|
||||||
|
|
||||||
self.parser.add_option('--segments', dest='segments',
|
self.parser.add_option('--segments', dest='segments',
|
||||||
default=False, help=
|
default=False, help=
|
||||||
'''A string containing hex digits that represent which of
|
'''A string containing hex digits that represent which of
|
||||||
|
@ -291,9 +300,13 @@ class Harvester(CkanCommand):
|
||||||
'segments': self.options.segments}
|
'segments': self.options.segments}
|
||||||
|
|
||||||
|
|
||||||
objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
|
objs_count = get_action('harvest_objects_import')(context,{
|
||||||
|
'source_id': source_id,
|
||||||
|
'harvest_object_id': self.options.harvest_object_id,
|
||||||
|
'package_id': self.options.package_id,
|
||||||
|
})
|
||||||
|
|
||||||
print '%s objects reimported' % len(objs)
|
print '%s objects reimported' % objs_count
|
||||||
|
|
||||||
def create_harvest_job_all(self):
|
def create_harvest_job_all(self):
|
||||||
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
||||||
|
|
|
@ -5,7 +5,7 @@ import datetime
|
||||||
|
|
||||||
from pylons import config
|
from pylons import config
|
||||||
from paste.deploy.converters import asbool
|
from paste.deploy.converters import asbool
|
||||||
from sqlalchemy import and_
|
from sqlalchemy import and_, or_
|
||||||
|
|
||||||
from ckan.lib.search.index import PackageSearchIndex
|
from ckan.lib.search.index import PackageSearchIndex
|
||||||
from ckan.plugins import PluginImplementations
|
from ckan.plugins import PluginImplementations
|
||||||
|
@ -185,6 +185,8 @@ def harvest_objects_import(context,data_dict):
|
||||||
model = context['model']
|
model = context['model']
|
||||||
session = context['session']
|
session = context['session']
|
||||||
source_id = data_dict.get('source_id',None)
|
source_id = data_dict.get('source_id',None)
|
||||||
|
harvest_object_id = data_dict.get('harvest_object_id',None)
|
||||||
|
package_id_or_name = data_dict.get('package_id',None)
|
||||||
|
|
||||||
segments = context.get('segments',None)
|
segments = context.get('segments',None)
|
||||||
|
|
||||||
|
@ -205,9 +207,20 @@ def harvest_objects_import(context,data_dict):
|
||||||
.filter(HarvestObject.source==source) \
|
.filter(HarvestObject.source==source) \
|
||||||
.filter(HarvestObject.current==True)
|
.filter(HarvestObject.current==True)
|
||||||
|
|
||||||
|
elif harvest_object_id:
|
||||||
|
last_objects_ids = session.query(HarvestObject.id) \
|
||||||
|
.filter(HarvestObject.id==harvest_object_id)
|
||||||
|
elif package_id_or_name:
|
||||||
|
last_objects_ids = session.query(HarvestObject.id) \
|
||||||
|
.join(Package) \
|
||||||
|
.filter(HarvestObject.current==True) \
|
||||||
|
.filter(Package.state==u'active') \
|
||||||
|
.filter(or_(Package.id==package_id_or_name,
|
||||||
|
Package.name==package_id_or_name))
|
||||||
|
join_datasets = False
|
||||||
else:
|
else:
|
||||||
last_objects_ids = session.query(HarvestObject.id) \
|
last_objects_ids = session.query(HarvestObject.id) \
|
||||||
.filter(HarvestObject.current==True) \
|
.filter(HarvestObject.current==True)
|
||||||
|
|
||||||
if join_datasets:
|
if join_datasets:
|
||||||
last_objects_ids = last_objects_ids.join(Package) \
|
last_objects_ids = last_objects_ids.join(Package) \
|
||||||
|
|
Loading…
Reference in New Issue