From 009cc57e093df8af682e0053865239367f862dcc Mon Sep 17 00:00:00 2001 From: Petar Efnushev Date: Fri, 6 May 2016 18:44:02 +0200 Subject: [PATCH] Added clean-up mechanism for the harvest log --- ckanext/harvest/commands/harvester.py | 21 ++++++++++++++++++++- ckanext/harvest/model/__init__.py | 11 +++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 39be5ce..3f80ffd 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -3,6 +3,7 @@ from pprint import pprint from ckan import model from ckan.logic import get_action, ValidationError +from ckan.plugins import toolkit from ckan.lib.cli import CkanCommand @@ -66,6 +67,11 @@ class Harvester(CkanCommand): - removes all jobs from fetch and gather queue WARNING: if using Redis, this command purges all data in the current Redis database + + harvester clean_harvest_log + - Clean-up mechanism for the harvest log table. + You can configure the time frame through the configuration + parameter `ckan.harvest.log_timeframe`. The default time frame is 30 days harvester [-j] [-o|-g|-p {id/guid}] [--segments={segments}] import [{source-id}] - perform the import stage with the last fetched objects, for a certain @@ -87,7 +93,7 @@ class Harvester(CkanCommand): harvester job-all - create new harvest jobs for all active sources. - +https://www.facebook.com/ harvester reindex - reindexes the harvest source datasets @@ -192,6 +198,8 @@ class Harvester(CkanCommand): pprint(harvesters_info) elif cmd == 'reindex': self.reindex() + elif cmd == 'clean_harvest_log': + self.clean_harvest_log() else: print 'Command %s not recognized' % cmd @@ -513,3 +521,14 @@ class Harvester(CkanCommand): def is_singular(self, sequence): return len(sequence) == 1 + def clean_harvest_log(self): + from datetime import datetime, timedelta + from pylons import config + from ckanext.harvest.model import clean_harvest_log + + # Log time frame - in days + log_timeframe = toolkit.asint(config.get('ckan.harvest.log_timeframe', 30)) + condition = datetime.utcnow() - timedelta(days=log_timeframe) + + # Delete logs older then the given date + clean_harvest_log(condition=condition) \ No newline at end of file diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index 70fce78..e0d4698 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -566,3 +566,14 @@ def migrate_v3_create_datasets(source_ids=None): log.info('Created new package for source {0} ({1})'.format(source.id, source.url)) except logic.ValidationError,e: log.error('Validation Error: %s' % str(e.error_summary)) + +def clean_harvest_log(condition): + Session.query(HarvestLog).filter(HarvestLog.created <= condition)\ + .delete(synchronize_session=False) + try: + Session.commit() + except InvalidRequestError: + Session.rollback() + log.error('An error occurred while trying to clean-up the harvest log table') + + log.info('Harvest log table clean-up finished successfully')