Added clean-up mechanism for the harvest log

This commit is contained in:
Petar Efnushev 2016-05-06 18:44:02 +02:00
parent 3d519ce0b2
commit 009cc57e09
2 changed files with 31 additions and 1 deletions

View File

@ -3,6 +3,7 @@ from pprint import pprint
from ckan import model
from ckan.logic import get_action, ValidationError
from ckan.plugins import toolkit
from ckan.lib.cli import CkanCommand
@ -66,6 +67,11 @@ class Harvester(CkanCommand):
- removes all jobs from fetch and gather queue
WARNING: if using Redis, this command purges all data in the current
Redis database
harvester clean_harvest_log
- Clean-up mechanism for the harvest log table.
You can configure the time frame through the configuration
parameter `ckan.harvest.log_timeframe`. The default time frame is 30 days
harvester [-j] [-o|-g|-p {id/guid}] [--segments={segments}] import [{source-id}]
- perform the import stage with the last fetched objects, for a certain
@ -87,7 +93,7 @@ class Harvester(CkanCommand):
harvester job-all
- create new harvest jobs for all active sources.
https://www.facebook.com/
harvester reindex
- reindexes the harvest source datasets
@ -192,6 +198,8 @@ class Harvester(CkanCommand):
pprint(harvesters_info)
elif cmd == 'reindex':
self.reindex()
elif cmd == 'clean_harvest_log':
self.clean_harvest_log()
else:
print 'Command %s not recognized' % cmd
@ -513,3 +521,14 @@ class Harvester(CkanCommand):
def is_singular(self, sequence):
return len(sequence) == 1
def clean_harvest_log(self):
from datetime import datetime, timedelta
from pylons import config
from ckanext.harvest.model import clean_harvest_log
# Log time frame - in days
log_timeframe = toolkit.asint(config.get('ckan.harvest.log_timeframe', 30))
condition = datetime.utcnow() - timedelta(days=log_timeframe)
# Delete logs older then the given date
clean_harvest_log(condition=condition)

View File

@ -566,3 +566,14 @@ def migrate_v3_create_datasets(source_ids=None):
log.info('Created new package for source {0} ({1})'.format(source.id, source.url))
except logic.ValidationError,e:
log.error('Validation Error: %s' % str(e.error_summary))
def clean_harvest_log(condition):
Session.query(HarvestLog).filter(HarvestLog.created <= condition)\
.delete(synchronize_session=False)
try:
Session.commit()
except InvalidRequestError:
Session.rollback()
log.error('An error occurred while trying to clean-up the harvest log table')
log.info('Harvest log table clean-up finished successfully')