Fixed bug with deleting harvest source's which have a custom

configuration. Added PEP-8 compliance.
2015-10-30 15:15:41 +00:00 · 2015-10-30 15:15:41 +00:00 · 02b81187df
parent 55325f5940
commit 02b81187df
2 changed files with 98 additions and 74 deletions
--- a/ckanext/harvest/logic/action/delete.py
+++ b/ckanext/harvest/logic/action/delete.py
@ -5,8 +5,9 @@ from ckan import plugins as p

 log = logging.getLogger(__name__)

+
 def harvest_source_delete(context, data_dict):
-    '''
+    """
    Deletes an existing harvest source

    This method just proxies the request to package_delete,
@ -19,16 +20,14 @@ def harvest_source_delete(context, data_dict):
    :returns: the newly created harvest source
    :rtype: dictionary

-    '''
+    """
+
    log.info('Deleting harvest source: %r', data_dict)

    p.toolkit.check_access('harvest_source_delete', context, data_dict)
-
    p.toolkit.get_action('package_delete')(context, data_dict)
-
    if context.get('clear_source', False):
-
-        # We need the id, the name won't work
+        # We need the id, the name won't work.
        package_dict = p.toolkit.get_action('package_show')(context, data_dict)
-
-        p.toolkit.get_action('harvest_source_clear')(context, {'id': package_dict['id']})
+        p.toolkit.get_action('harvest_source_clear')(
+            context, {'id': package_dict['id']})
--- a/ckanext/harvest/logic/action/update.py
+++ b/ckanext/harvest/logic/action/update.py
@ -29,11 +29,13 @@ from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
 from ckanext.harvest.logic import HarvestJobExists, NoNewHarvestJobError
 from ckanext.harvest.logic.schema import harvest_source_show_package_schema

-from ckanext.harvest.logic.action.get import harvest_source_show, harvest_job_list, _get_sources_for_user
+from ckanext.harvest.logic.action.get import harvest_source_show, \
+    harvest_job_list, _get_sources_for_user


 log = logging.getLogger(__name__)

+
 def harvest_source_update(context, data_dict):
    '''
    Updates an existing harvest source
@ -83,15 +85,19 @@ def harvest_source_update(context,data_dict):

    return source

+
 def harvest_source_clear(context, data_dict):
    '''
-    Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself.
-    This is useful to clean history of long running harvest sources to start again fresh.
+    Clears all datasets, jobs and objects related to a harvest source, but
+    keeps the source itself.
+    This is useful to clean history of long running harvest sources to start
+    again fresh.

    :param id: the id of the harvest source to clear
    :type id: string

    '''
+
    check_access('harvest_source_clear', context, data_dict)

    harvest_source_id = data_dict.get('id', None)
@ -108,7 +114,11 @@ def harvest_source_clear(context,data_dict):

    model = context['model']

-    sql = "select id from related where id in (select related_id from related_dataset where dataset_id in (select package_id from harvest_object where harvest_source_id = '{harvest_source_id}'));".format(harvest_source_id=harvest_source_id)
+    sql = "select id from related where id in (select related_id from " \
+        "related_dataset where dataset_id in (select package_id from " \
+        "harvest_object where harvest_source_id = " \
+        "'{harvest_source_id}'));".format(
+            harvest_source_id=harvest_source_id)
    result = model.Session.execute(sql)
    ids = []
    for row in result:
@ -181,6 +191,7 @@ def harvest_source_clear(context,data_dict):

    return {'id': harvest_source_id}

+
 def harvest_source_index_clear(context, data_dict):

    check_access('harvest_source_clear', context, data_dict)
@ -194,8 +205,8 @@ def harvest_source_index_clear(context,data_dict):
    harvest_source_id = source.id

    conn = make_connection()
-    query = ''' +%s:"%s" +site_id:"%s" ''' % ('harvest_source_id', harvest_source_id,
-                                            config.get('ckan.site_id'))
+    query = ''' +%s:"%s" +site_id:"%s" ''' % (
+        'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))
    try:
        conn.delete_query(query)
        if asbool(config.get('ckan.search.solr_commit', 'true')):
@ -208,6 +219,7 @@ def harvest_source_index_clear(context,data_dict):

    return {'id': harvest_source_id}

+
 def harvest_objects_import(context, data_dict):
    '''
        Reimports the current harvest objects
@ -217,6 +229,7 @@ def harvest_objects_import(context,data_dict):
        It will only affect the last fetched objects already present in the
        database.
    '''
+
    log.info('Harvest objects import: %r', data_dict)
    check_access('harvest_objects_import', context, data_dict)

@ -228,7 +241,7 @@ def harvest_objects_import(context,data_dict):

    segments = context.get('segments', None)

-    join_datasets = context.get('join_datasets',True)
+    join_datasets = context.get('join_datasets', rue)

    if source_id:
        source = HarvestSource.get(source_id)
@ -269,7 +282,8 @@ def harvest_objects_import(context,data_dict):
    last_objects_count = 0

    for obj_id in last_objects_ids:
-        if segments and str(hashlib.md5(obj_id[0]).hexdigest())[0] not in segments:
+        if segments and \
+            str(hashlib.md5(obj_id[0]).hexdigest())[0] not in segments:
            continue

        obj = session.query(HarvestObject).get(obj_id)
@ -284,6 +298,7 @@ def harvest_objects_import(context,data_dict):
    log.info('Harvest objects imported: %s', last_objects_count)
    return last_objects_count

+
 def _caluclate_next_run(frequency):

    now = datetime.datetime.utcnow()
@ -325,6 +340,7 @@ def _make_scheduled_jobs(context, data_dict):
        source.next_run = _caluclate_next_run(source.frequency)
        source.save()

+
 def harvest_jobs_run(context, data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)
@ -339,7 +355,8 @@ def harvest_jobs_run(context,data_dict):
    context['return_objects'] = False

    # Flag finished jobs as such
-    jobs = harvest_job_list(context,{'source_id':source_id,'status':u'Running'})
+    jobs = harvest_job_list(
+        context, {'source_id': source_id, 'status': u'Running'})
    if len(jobs):
        for job in jobs:
            if job['gather_finished']:
@ -363,14 +380,15 @@ def harvest_jobs_run(context,data_dict):
                    job_obj.save()
                    # Reindex the harvest source dataset so it has the latest
                    # status
-                    get_action('harvest_source_reindex')(context,
-                        {'id': job_obj.source.id})
+                    get_action('harvest_source_reindex')(
+                        context, {'id': job_obj.source.id})

    # resubmit old redis tasks
    resubmit_jobs()

    # Check if there are pending harvest jobs
-    jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
+    jobs = harvest_job_list(
+        context, {'source_id': source_id, 'status': u'New'})
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        raise NoNewHarvestJobError('There are no new harvesting jobs')
@ -412,15 +430,17 @@ def harvest_sources_reindex(context, data_dict):

    reindex_context = {'defer_commit': True}
    for package in packages:
-        get_action('harvest_source_reindex')(reindex_context, {'id': package.id})
+        get_action('harvest_source_reindex')(
+            reindex_context, {'id': package.id})

    package_index.commit()

    return True

+
@logic.side_effect_free
 def harvest_source_reindex(context, data_dict):
-    '''Reindex a single harvest source'''
+    """Reindex a single harvest source."""

    harvest_source_id = logic.get_or_bust(data_dict, 'id')
    defer_commit = context.get('defer_commit', False)
@ -428,18 +448,23 @@ def harvest_source_reindex(context, data_dict):
    if 'extras_as_string'in context:
        del context['extras_as_string']
    context.update({'ignore_auth': True})
-    package_dict = logic.get_action('harvest_source_show')(context,
-        {'id': harvest_source_id})
-    log.debug('Updating search index for harvest source {0}'.format(harvest_source_id))
+    package_dict = logic.get_action('harvest_source_show')(
+        context, {'id': harvest_source_id})
+    log.debug('Updating search index for harvest source {0}'.format(
+        harvest_source_id))

    # Remove configuration values
    new_dict = {}
-    if package_dict.get('config'):
+    if package_dict.get('config', None):
        config = json.loads(package_dict['config'])
        for key, value in package_dict.iteritems():
-            if key not in config:
+            if value:
+                if value and key not in config:
                    new_dict[key] = value
+
    package_index = PackageSearchIndex()
-    package_index.index_package(new_dict, defer_commit=defer_commit)
+    package_index.index_package(
+        new_dict,
+        defer_commit=defer_commit)

    return True