Fixed bug with deleting harvest source's which have a custom
configuration. Added PEP-8 compliance.
This commit is contained in:
parent
55325f5940
commit
02b81187df
|
@ -5,8 +5,9 @@ from ckan import plugins as p
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def harvest_source_delete(context, data_dict):
|
def harvest_source_delete(context, data_dict):
|
||||||
'''
|
"""
|
||||||
Deletes an existing harvest source
|
Deletes an existing harvest source
|
||||||
|
|
||||||
This method just proxies the request to package_delete,
|
This method just proxies the request to package_delete,
|
||||||
|
@ -19,16 +20,14 @@ def harvest_source_delete(context, data_dict):
|
||||||
:returns: the newly created harvest source
|
:returns: the newly created harvest source
|
||||||
:rtype: dictionary
|
:rtype: dictionary
|
||||||
|
|
||||||
'''
|
"""
|
||||||
|
|
||||||
log.info('Deleting harvest source: %r', data_dict)
|
log.info('Deleting harvest source: %r', data_dict)
|
||||||
|
|
||||||
p.toolkit.check_access('harvest_source_delete', context, data_dict)
|
p.toolkit.check_access('harvest_source_delete', context, data_dict)
|
||||||
|
|
||||||
p.toolkit.get_action('package_delete')(context, data_dict)
|
p.toolkit.get_action('package_delete')(context, data_dict)
|
||||||
|
|
||||||
if context.get('clear_source', False):
|
if context.get('clear_source', False):
|
||||||
|
# We need the id, the name won't work.
|
||||||
# We need the id, the name won't work
|
|
||||||
package_dict = p.toolkit.get_action('package_show')(context, data_dict)
|
package_dict = p.toolkit.get_action('package_show')(context, data_dict)
|
||||||
|
p.toolkit.get_action('harvest_source_clear')(
|
||||||
p.toolkit.get_action('harvest_source_clear')(context, {'id': package_dict['id']})
|
context, {'id': package_dict['id']})
|
||||||
|
|
|
@ -29,11 +29,13 @@ from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
|
||||||
from ckanext.harvest.logic import HarvestJobExists, NoNewHarvestJobError
|
from ckanext.harvest.logic import HarvestJobExists, NoNewHarvestJobError
|
||||||
from ckanext.harvest.logic.schema import harvest_source_show_package_schema
|
from ckanext.harvest.logic.schema import harvest_source_show_package_schema
|
||||||
|
|
||||||
from ckanext.harvest.logic.action.get import harvest_source_show, harvest_job_list, _get_sources_for_user
|
from ckanext.harvest.logic.action.get import harvest_source_show, \
|
||||||
|
harvest_job_list, _get_sources_for_user
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def harvest_source_update(context, data_dict):
|
def harvest_source_update(context, data_dict):
|
||||||
'''
|
'''
|
||||||
Updates an existing harvest source
|
Updates an existing harvest source
|
||||||
|
@ -83,15 +85,19 @@ def harvest_source_update(context,data_dict):
|
||||||
|
|
||||||
return source
|
return source
|
||||||
|
|
||||||
|
|
||||||
def harvest_source_clear(context, data_dict):
|
def harvest_source_clear(context, data_dict):
|
||||||
'''
|
'''
|
||||||
Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself.
|
Clears all datasets, jobs and objects related to a harvest source, but
|
||||||
This is useful to clean history of long running harvest sources to start again fresh.
|
keeps the source itself.
|
||||||
|
This is useful to clean history of long running harvest sources to start
|
||||||
|
again fresh.
|
||||||
|
|
||||||
:param id: the id of the harvest source to clear
|
:param id: the id of the harvest source to clear
|
||||||
:type id: string
|
:type id: string
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
check_access('harvest_source_clear', context, data_dict)
|
check_access('harvest_source_clear', context, data_dict)
|
||||||
|
|
||||||
harvest_source_id = data_dict.get('id', None)
|
harvest_source_id = data_dict.get('id', None)
|
||||||
|
@ -108,7 +114,11 @@ def harvest_source_clear(context,data_dict):
|
||||||
|
|
||||||
model = context['model']
|
model = context['model']
|
||||||
|
|
||||||
sql = "select id from related where id in (select related_id from related_dataset where dataset_id in (select package_id from harvest_object where harvest_source_id = '{harvest_source_id}'));".format(harvest_source_id=harvest_source_id)
|
sql = "select id from related where id in (select related_id from " \
|
||||||
|
"related_dataset where dataset_id in (select package_id from " \
|
||||||
|
"harvest_object where harvest_source_id = " \
|
||||||
|
"'{harvest_source_id}'));".format(
|
||||||
|
harvest_source_id=harvest_source_id)
|
||||||
result = model.Session.execute(sql)
|
result = model.Session.execute(sql)
|
||||||
ids = []
|
ids = []
|
||||||
for row in result:
|
for row in result:
|
||||||
|
@ -181,6 +191,7 @@ def harvest_source_clear(context,data_dict):
|
||||||
|
|
||||||
return {'id': harvest_source_id}
|
return {'id': harvest_source_id}
|
||||||
|
|
||||||
|
|
||||||
def harvest_source_index_clear(context, data_dict):
|
def harvest_source_index_clear(context, data_dict):
|
||||||
|
|
||||||
check_access('harvest_source_clear', context, data_dict)
|
check_access('harvest_source_clear', context, data_dict)
|
||||||
|
@ -194,8 +205,8 @@ def harvest_source_index_clear(context,data_dict):
|
||||||
harvest_source_id = source.id
|
harvest_source_id = source.id
|
||||||
|
|
||||||
conn = make_connection()
|
conn = make_connection()
|
||||||
query = ''' +%s:"%s" +site_id:"%s" ''' % ('harvest_source_id', harvest_source_id,
|
query = ''' +%s:"%s" +site_id:"%s" ''' % (
|
||||||
config.get('ckan.site_id'))
|
'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))
|
||||||
try:
|
try:
|
||||||
conn.delete_query(query)
|
conn.delete_query(query)
|
||||||
if asbool(config.get('ckan.search.solr_commit', 'true')):
|
if asbool(config.get('ckan.search.solr_commit', 'true')):
|
||||||
|
@ -208,6 +219,7 @@ def harvest_source_index_clear(context,data_dict):
|
||||||
|
|
||||||
return {'id': harvest_source_id}
|
return {'id': harvest_source_id}
|
||||||
|
|
||||||
|
|
||||||
def harvest_objects_import(context, data_dict):
|
def harvest_objects_import(context, data_dict):
|
||||||
'''
|
'''
|
||||||
Reimports the current harvest objects
|
Reimports the current harvest objects
|
||||||
|
@ -217,6 +229,7 @@ def harvest_objects_import(context,data_dict):
|
||||||
It will only affect the last fetched objects already present in the
|
It will only affect the last fetched objects already present in the
|
||||||
database.
|
database.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
log.info('Harvest objects import: %r', data_dict)
|
log.info('Harvest objects import: %r', data_dict)
|
||||||
check_access('harvest_objects_import', context, data_dict)
|
check_access('harvest_objects_import', context, data_dict)
|
||||||
|
|
||||||
|
@ -228,7 +241,7 @@ def harvest_objects_import(context,data_dict):
|
||||||
|
|
||||||
segments = context.get('segments', None)
|
segments = context.get('segments', None)
|
||||||
|
|
||||||
join_datasets = context.get('join_datasets',True)
|
join_datasets = context.get('join_datasets', rue)
|
||||||
|
|
||||||
if source_id:
|
if source_id:
|
||||||
source = HarvestSource.get(source_id)
|
source = HarvestSource.get(source_id)
|
||||||
|
@ -269,7 +282,8 @@ def harvest_objects_import(context,data_dict):
|
||||||
last_objects_count = 0
|
last_objects_count = 0
|
||||||
|
|
||||||
for obj_id in last_objects_ids:
|
for obj_id in last_objects_ids:
|
||||||
if segments and str(hashlib.md5(obj_id[0]).hexdigest())[0] not in segments:
|
if segments and \
|
||||||
|
str(hashlib.md5(obj_id[0]).hexdigest())[0] not in segments:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
obj = session.query(HarvestObject).get(obj_id)
|
obj = session.query(HarvestObject).get(obj_id)
|
||||||
|
@ -284,6 +298,7 @@ def harvest_objects_import(context,data_dict):
|
||||||
log.info('Harvest objects imported: %s', last_objects_count)
|
log.info('Harvest objects imported: %s', last_objects_count)
|
||||||
return last_objects_count
|
return last_objects_count
|
||||||
|
|
||||||
|
|
||||||
def _caluclate_next_run(frequency):
|
def _caluclate_next_run(frequency):
|
||||||
|
|
||||||
now = datetime.datetime.utcnow()
|
now = datetime.datetime.utcnow()
|
||||||
|
@ -325,6 +340,7 @@ def _make_scheduled_jobs(context, data_dict):
|
||||||
source.next_run = _caluclate_next_run(source.frequency)
|
source.next_run = _caluclate_next_run(source.frequency)
|
||||||
source.save()
|
source.save()
|
||||||
|
|
||||||
|
|
||||||
def harvest_jobs_run(context, data_dict):
|
def harvest_jobs_run(context, data_dict):
|
||||||
log.info('Harvest job run: %r', data_dict)
|
log.info('Harvest job run: %r', data_dict)
|
||||||
check_access('harvest_jobs_run', context, data_dict)
|
check_access('harvest_jobs_run', context, data_dict)
|
||||||
|
@ -339,7 +355,8 @@ def harvest_jobs_run(context,data_dict):
|
||||||
context['return_objects'] = False
|
context['return_objects'] = False
|
||||||
|
|
||||||
# Flag finished jobs as such
|
# Flag finished jobs as such
|
||||||
jobs = harvest_job_list(context,{'source_id':source_id,'status':u'Running'})
|
jobs = harvest_job_list(
|
||||||
|
context, {'source_id': source_id, 'status': u'Running'})
|
||||||
if len(jobs):
|
if len(jobs):
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
if job['gather_finished']:
|
if job['gather_finished']:
|
||||||
|
@ -363,14 +380,15 @@ def harvest_jobs_run(context,data_dict):
|
||||||
job_obj.save()
|
job_obj.save()
|
||||||
# Reindex the harvest source dataset so it has the latest
|
# Reindex the harvest source dataset so it has the latest
|
||||||
# status
|
# status
|
||||||
get_action('harvest_source_reindex')(context,
|
get_action('harvest_source_reindex')(
|
||||||
{'id': job_obj.source.id})
|
context, {'id': job_obj.source.id})
|
||||||
|
|
||||||
# resubmit old redis tasks
|
# resubmit old redis tasks
|
||||||
resubmit_jobs()
|
resubmit_jobs()
|
||||||
|
|
||||||
# Check if there are pending harvest jobs
|
# Check if there are pending harvest jobs
|
||||||
jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
|
jobs = harvest_job_list(
|
||||||
|
context, {'source_id': source_id, 'status': u'New'})
|
||||||
if len(jobs) == 0:
|
if len(jobs) == 0:
|
||||||
log.info('No new harvest jobs.')
|
log.info('No new harvest jobs.')
|
||||||
raise NoNewHarvestJobError('There are no new harvesting jobs')
|
raise NoNewHarvestJobError('There are no new harvesting jobs')
|
||||||
|
@ -412,15 +430,17 @@ def harvest_sources_reindex(context, data_dict):
|
||||||
|
|
||||||
reindex_context = {'defer_commit': True}
|
reindex_context = {'defer_commit': True}
|
||||||
for package in packages:
|
for package in packages:
|
||||||
get_action('harvest_source_reindex')(reindex_context, {'id': package.id})
|
get_action('harvest_source_reindex')(
|
||||||
|
reindex_context, {'id': package.id})
|
||||||
|
|
||||||
package_index.commit()
|
package_index.commit()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@logic.side_effect_free
|
@logic.side_effect_free
|
||||||
def harvest_source_reindex(context, data_dict):
|
def harvest_source_reindex(context, data_dict):
|
||||||
'''Reindex a single harvest source'''
|
"""Reindex a single harvest source."""
|
||||||
|
|
||||||
harvest_source_id = logic.get_or_bust(data_dict, 'id')
|
harvest_source_id = logic.get_or_bust(data_dict, 'id')
|
||||||
defer_commit = context.get('defer_commit', False)
|
defer_commit = context.get('defer_commit', False)
|
||||||
|
@ -428,18 +448,23 @@ def harvest_source_reindex(context, data_dict):
|
||||||
if 'extras_as_string'in context:
|
if 'extras_as_string'in context:
|
||||||
del context['extras_as_string']
|
del context['extras_as_string']
|
||||||
context.update({'ignore_auth': True})
|
context.update({'ignore_auth': True})
|
||||||
package_dict = logic.get_action('harvest_source_show')(context,
|
package_dict = logic.get_action('harvest_source_show')(
|
||||||
{'id': harvest_source_id})
|
context, {'id': harvest_source_id})
|
||||||
log.debug('Updating search index for harvest source {0}'.format(harvest_source_id))
|
log.debug('Updating search index for harvest source {0}'.format(
|
||||||
|
harvest_source_id))
|
||||||
|
|
||||||
# Remove configuration values
|
# Remove configuration values
|
||||||
new_dict = {}
|
new_dict = {}
|
||||||
if package_dict.get('config'):
|
if package_dict.get('config', None):
|
||||||
config = json.loads(package_dict['config'])
|
config = json.loads(package_dict['config'])
|
||||||
for key, value in package_dict.iteritems():
|
for key, value in package_dict.iteritems():
|
||||||
if key not in config:
|
if value:
|
||||||
|
if value and key not in config:
|
||||||
new_dict[key] = value
|
new_dict[key] = value
|
||||||
|
|
||||||
package_index = PackageSearchIndex()
|
package_index = PackageSearchIndex()
|
||||||
package_index.index_package(new_dict, defer_commit=defer_commit)
|
package_index.index_package(
|
||||||
|
new_dict,
|
||||||
|
defer_commit=defer_commit)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Reference in New Issue