Support for single import commands

We are now able to run `paster harvester import` for a single harvest
object or for a single dataset, providing ids or name.
This commit is contained in:
amercader 2014-05-15 16:30:30 +01:00
parent d6998d8d04
commit 7459358fa1
2 changed files with 34 additions and 8 deletions

View File

@ -42,10 +42,13 @@ class Harvester(CkanCommand):
harvester purge_queues harvester purge_queues
- removes all jobs from fetch and gather queue - removes all jobs from fetch and gather queue
harvester [-j] [--segments={segments}] import [{source-id}] harvester [-j] [-o] [--segments={segments}] import [{source-id}]
- perform the import stage with the last fetched objects, optionally belonging to a certain source. - perform the import stage with the last fetched objects, for a certain
Please note that no objects will be fetched from the remote server. It will only affect source or a single harvest object. Please note that no objects will
the last fetched objects already present in the database. be fetched from the remote server. It will only affect the objects
already present in the database.
To perform it on a particular object use the -o flag.
If the -j flag is provided, the objects are not joined to existing datasets. This may be useful If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
when importing objects for the first time. when importing objects for the first time.
@ -79,6 +82,12 @@ class Harvester(CkanCommand):
self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets', self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
action='store_true', default=False, help='Do not join harvest objects to existing datasets') action='store_true', default=False, help='Do not join harvest objects to existing datasets')
self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id',
default=False, help='Id of the harvest object to which perfom the import stage')
self.parser.add_option('-p', '--package-id', dest='package_id',
default=False, help='Id of the package whose harvest object to perfom the import stage for')
self.parser.add_option('--segments', dest='segments', self.parser.add_option('--segments', dest='segments',
default=False, help= default=False, help=
'''A string containing hex digits that represent which of '''A string containing hex digits that represent which of
@ -291,9 +300,13 @@ class Harvester(CkanCommand):
'segments': self.options.segments} 'segments': self.options.segments}
objs = get_action('harvest_objects_import')(context,{'source_id':source_id}) objs_count = get_action('harvest_objects_import')(context,{
'source_id': source_id,
'harvest_object_id': self.options.harvest_object_id,
'package_id': self.options.package_id,
})
print '%s objects reimported' % len(objs) print '%s objects reimported' % objs_count
def create_harvest_job_all(self): def create_harvest_job_all(self):
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}

View File

@ -5,7 +5,7 @@ import datetime
from pylons import config from pylons import config
from paste.deploy.converters import asbool from paste.deploy.converters import asbool
from sqlalchemy import and_ from sqlalchemy import and_, or_
from ckan.lib.search.index import PackageSearchIndex from ckan.lib.search.index import PackageSearchIndex
from ckan.plugins import PluginImplementations from ckan.plugins import PluginImplementations
@ -185,6 +185,8 @@ def harvest_objects_import(context,data_dict):
model = context['model'] model = context['model']
session = context['session'] session = context['session']
source_id = data_dict.get('source_id',None) source_id = data_dict.get('source_id',None)
harvest_object_id = data_dict.get('harvest_object_id',None)
package_id_or_name = data_dict.get('package_id',None)
segments = context.get('segments',None) segments = context.get('segments',None)
@ -205,9 +207,20 @@ def harvest_objects_import(context,data_dict):
.filter(HarvestObject.source==source) \ .filter(HarvestObject.source==source) \
.filter(HarvestObject.current==True) .filter(HarvestObject.current==True)
elif harvest_object_id:
last_objects_ids = session.query(HarvestObject.id) \
.filter(HarvestObject.id==harvest_object_id)
elif package_id_or_name:
last_objects_ids = session.query(HarvestObject.id) \
.join(Package) \
.filter(HarvestObject.current==True) \
.filter(Package.state==u'active') \
.filter(or_(Package.id==package_id_or_name,
Package.name==package_id_or_name))
join_datasets = False
else: else:
last_objects_ids = session.query(HarvestObject.id) \ last_objects_ids = session.query(HarvestObject.id) \
.filter(HarvestObject.current==True) \ .filter(HarvestObject.current==True)
if join_datasets: if join_datasets:
last_objects_ids = last_objects_ids.join(Package) \ last_objects_ids = last_objects_ids.join(Package) \