diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 121c28e..c304ca7 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -80,6 +80,60 @@ def harvest_source_update(context,data_dict): return source +def harvest_source_clear(context,data_dict): + ''' + Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself. + This is useful to clean history of long running harvest sources to start again fresh. + + :param id: the id of the harvest source to clear + :type id: string + + ''' + check_access('harvest_source_clear',context,data_dict) + + harvest_source_id = data_dict.get('id',None) + + source = HarvestSource.get(harvest_source_id) + if not source: + log.error('Harvest source %s does not exist', harvest_source_id) + raise NotFound('Harvest source %s does not exist' % harvest_source_id) + + sql = '''begin; + + select package_id as id into "{harvest_source_id}" from harvest_object where harvest_source_id = '{harvest_source_id}' ; + + --harvest + delete from harvest_object_error where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}'); + delete from harvest_object_extra where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}'); + delete from harvest_object where harvest_source_id = '{harvest_source_id}'; + delete from harvest_gather_error where harvest_job_id in (select id from harvest_job where source_id = '{harvest_source_id}'); + delete from harvest_job where source_id = '{harvest_source_id}'; + + delete from package_role where package_id in (select id from "{harvest_source_id}" ); + delete from user_object_role where id not in (select user_object_role_id from package_role) and context = 'Package'; + delete from resource_revision where resource_group_id in (select id from resource_group where package_id in (select id from "{harvest_source_id}")); + delete from resource_group_revision where package_id in (select id from "{harvest_source_id}"); + delete from package_tag_revision where package_id in (select id from "{harvest_source_id}"); + delete from member_revision where table_id in (select id from "{harvest_source_id}"); + delete from package_extra_revision where package_id in (select id from "{harvest_source_id}"); + delete from package_revision where id in (select id from "{harvest_source_id}"); + + delete from package_tag where id not in (select id from package_tag_revision); + delete from resource where id not in (select id from resource_revision); + delete from package_extra where id not in (select id from package_extra_revision); + delete from member where id not in (select id from member_revision); + delete from resource_group where id not in (select id from resource_group_revision); + delete from package where id not in (select id from package_revision); + + drop table "{harvest_source_id}"; + + commit;'''.format(harvest_source_id=harvest_source_id) + + model = context['model'] + + model.Session.execute(sql) + + return {'id': harvest_source_id} def harvest_objects_import(context,data_dict): diff --git a/ckanext/harvest/logic/auth/update.py b/ckanext/harvest/logic/auth/update.py index 45d13d1..d3a7739 100644 --- a/ckanext/harvest/logic/auth/update.py +++ b/ckanext/harvest/logic/auth/update.py @@ -27,6 +27,8 @@ def harvest_source_update(context, data_dict): return {'success': False, 'msg': pt._('User {0} not authorized to update harvest source {1}').format(user, source_id)} +def harvest_source_clear(context, data_dict): + return {'success': False, 'msg': pt._('Only sysadmins can clear harvest source')} def harvest_objects_import(context, data_dict): '''