From 7459358fa1527c037f16083dfc09ca97fc7e4c4b Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Thu, 15 May 2014 16:30:30 +0100
Subject: [PATCH] Support for single import commands

We are now able to run `paster harvester import` for a single harvest
object or for a single dataset, providing ids or name.
---
 ckanext/harvest/commands/harvester.py  | 25 +++++++++++++++++++------
 ckanext/harvest/logic/action/update.py | 17 +++++++++++++++--
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py
index 947e3c6..fadd669 100644
--- a/ckanext/harvest/commands/harvester.py
+++ b/ckanext/harvest/commands/harvester.py
@@ -42,10 +42,13 @@ class Harvester(CkanCommand):
       harvester purge_queues
         - removes all jobs from fetch and gather queue
 
-      harvester [-j] [--segments={segments}] import [{source-id}]
-        - perform the import stage with the last fetched objects, optionally belonging to a certain source.
-          Please note that no objects will be fetched from the remote server. It will only affect
-          the last fetched objects already present in the database.
+      harvester [-j] [-o] [--segments={segments}] import [{source-id}]
+        - perform the import stage with the last fetched objects, for a certain
+          source or a single harvest object. Please note that no objects will
+          be fetched from the remote server. It will only affect the objects
+          already present in the database.
+
+          To perform it on a particular object use the -o flag.
 
           If the -j flag is provided, the objects are not joined to existing datasets. This may be useful
           when importing objects for the first time.
@@ -79,6 +82,12 @@ class Harvester(CkanCommand):
         self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets',
             action='store_true', default=False, help='Do not join harvest objects to existing datasets')
 
+        self.parser.add_option('-o', '--harvest-object-id', dest='harvest_object_id',
+            default=False, help='Id of the harvest object to which perfom the import stage')
+
+        self.parser.add_option('-p', '--package-id', dest='package_id',
+            default=False, help='Id of the package whose harvest object to perfom the import stage for')
+
         self.parser.add_option('--segments', dest='segments',
             default=False, help=
 '''A string containing hex digits that represent which of
@@ -291,9 +300,13 @@ class Harvester(CkanCommand):
                    'segments': self.options.segments}
 
 
-        objs = get_action('harvest_objects_import')(context,{'source_id':source_id})
+        objs_count = get_action('harvest_objects_import')(context,{
+                'source_id': source_id,
+                'harvest_object_id': self.options.harvest_object_id,
+                'package_id': self.options.package_id,
+                })
 
-        print '%s objects reimported' % len(objs)
+        print '%s objects reimported' % objs_count
 
     def create_harvest_job_all(self):
         context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py
index 206cf82..baf2c14 100644
--- a/ckanext/harvest/logic/action/update.py
+++ b/ckanext/harvest/logic/action/update.py
@@ -5,7 +5,7 @@ import datetime
 
 from pylons import config
 from paste.deploy.converters import asbool
-from sqlalchemy import and_
+from sqlalchemy import and_, or_
 
 from ckan.lib.search.index import PackageSearchIndex
 from ckan.plugins import PluginImplementations
@@ -185,6 +185,8 @@ def harvest_objects_import(context,data_dict):
     model = context['model']
     session = context['session']
     source_id = data_dict.get('source_id',None)
+    harvest_object_id = data_dict.get('harvest_object_id',None)
+    package_id_or_name = data_dict.get('package_id',None)
 
     segments = context.get('segments',None)
 
@@ -205,9 +207,20 @@ def harvest_objects_import(context,data_dict):
                 .filter(HarvestObject.source==source) \
                 .filter(HarvestObject.current==True)
 
+    elif harvest_object_id:
+        last_objects_ids = session.query(HarvestObject.id) \
+                .filter(HarvestObject.id==harvest_object_id)
+    elif package_id_or_name:
+        last_objects_ids = session.query(HarvestObject.id) \
+            .join(Package) \
+            .filter(HarvestObject.current==True) \
+            .filter(Package.state==u'active') \
+            .filter(or_(Package.id==package_id_or_name,
+                        Package.name==package_id_or_name))
+        join_datasets = False
     else:
         last_objects_ids = session.query(HarvestObject.id) \
-                .filter(HarvestObject.current==True) \
+                .filter(HarvestObject.current==True)
 
     if join_datasets:
         last_objects_ids = last_objects_ids.join(Package) \