From 0be2c868cb2f5a50d3c469399627566205bd001f Mon Sep 17 00:00:00 2001 From: Petar Efnushev Date: Wed, 11 May 2016 13:29:53 +0200 Subject: [PATCH] README updates DBLogHandler updates Added harvest_log table migration for existing users Implemented database log scoping --- README.rst | 81 ++++++++++++++++----------- ckanext/harvest/commands/harvester.py | 4 +- ckanext/harvest/log.py | 8 +-- ckanext/harvest/model/__init__.py | 6 +- ckanext/harvest/plugin.py | 58 +++++++++++++++++++ 5 files changed, 116 insertions(+), 41 deletions(-) diff --git a/README.rst b/README.rst index 07e6ef8..329befa 100644 --- a/README.rst +++ b/README.rst @@ -58,7 +58,53 @@ running a version lower than 2.0. ckan.harvest.mq.type = redis -7. Setup time frame(in days) for the clean-up mechanism with the following config parameter:: +7. If you want your ckan harvest logs to be exposed to the ckan API you need to properly + configure the logger. The default configuration logs everything to the database with + log level ``DEBUG``. If you want to modify the database logger configure the following + parameter:: + + ``ckan.harvest.log_scope = 0`` + + * Log scope settings: + + - ``-1`` Do not log to the database + - ``0`` Log everything - Default + - ``1`` model, logic.action, logic.validators, harvesters + - ``2`` model, logic.action, logic.validators + - ``3`` model, logic.action + - ``4`` logic.action + - ``5`` model + - ``6`` plugin + - ``7`` harvesters + + Additionally you can configure the logger in the following way:: + + [loggers] + keys = ckan_harvester + + [handlers] + keys = dblog + + [formatters] + keys = dblog + + [logger_ckan_harvester] + qualname = ckanext.harvest + handlers = dblog + level = DEBUG + + [handler_dblog] + class = ckanext.harvest.log.DBLogHandler + args = () + level = DEBUG + formatter = dblog + + [formatter_dblog] + format = %(message)s + + If you are having troubles configuring ckan logger please refer to ``test-core.ini`` + +8. Setup time frame(in days) for the clean-up mechanism with the following config parameter:: ckan.harvest.log_timeframe = 10 @@ -91,39 +137,6 @@ config option (or ``default``) will be used to namespace the relevant things: * On Redis, it will namespace the keys used, so only the relevant instance gets them, eg ``site1:harvest_job_id``, ``site1:harvest_object__id:804f114a-8f68-4e7c-b124-3eb00f66202f`` -7. If you want your ckan harvest logs to be exposed to the ckan API you need to add the - following configuration options in your ckan configuriation file:: - - [loggers] - - keys = ckan_harvester - - [handlers] - - keys = dblog - - [formatters] - - keys = dblog - - [logger_ckan_harvester] - - qualname = ckanext.harvest - handlers = dblog - level = DEBUG - - [handler_dblog] - - class = ckanext.harvest.log.DBLogHandler - args = () - level = DEBUG - formatter = dblog - - [formatter_dblog] - - format = %(message)s - - If you are having troubles configuring ckan logger please refer to ``test-core.ini`` Configuration ============= diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 3f80ffd..f59102c 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -93,7 +93,7 @@ class Harvester(CkanCommand): harvester job-all - create new harvest jobs for all active sources. -https://www.facebook.com/ + harvester reindex - reindexes the harvest source datasets @@ -531,4 +531,4 @@ https://www.facebook.com/ condition = datetime.utcnow() - timedelta(days=log_timeframe) # Delete logs older then the given date - clean_harvest_log(condition=condition) \ No newline at end of file + clean_harvest_log(condition=condition) \ No newline at end of file diff --git a/ckanext/harvest/log.py b/ckanext/harvest/log.py index 98f3abf..cb1b242 100644 --- a/ckanext/harvest/log.py +++ b/ckanext/harvest/log.py @@ -1,10 +1,10 @@ -from logging import Handler +from logging import Handler, NOTSET from ckanext.harvest.model import HarvestLog -class DBLogHandler(Handler, object): - def __init__(self): - super(DBLogHandler,self).__init__() +class DBLogHandler(Handler): + def __init__(self, level=NOTSET): + super(DBLogHandler,self).__init__(level=level) def emit(self, record): try: diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index e0d4698..fe48c78 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -64,7 +64,7 @@ def setup(): harvest_object_error_table.create() harvest_object_extra_table.create() harvest_log_table.create() - + log.debug('Harvest tables created') else: from ckan.model.meta import engine @@ -88,6 +88,10 @@ def setup(): log.debug('Creating harvest source datasets for %i existing sources', len(sources_to_migrate)) sources_to_migrate = [s[0] for s in sources_to_migrate] migrate_v3_create_datasets(sources_to_migrate) + + # Check if harvest_log table exist - needed for existing users + if not 'harvest_log' in inspector.get_table_names(): + harvest_log_table.create() class HarvestError(Exception): diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index 4db7cfb..27bb857 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -19,6 +19,7 @@ from ckanext.harvest import logic as harvest_logic from ckanext.harvest.model import setup as model_setup from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject +from ckanext.harvest.log import DBLogHandler @@ -217,6 +218,9 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm, DefaultTranslation): # Setup harvest model model_setup() + + # Configure logger + _configure_logger(config) self.startup = False @@ -463,3 +467,57 @@ def _delete_harvest_source_object(context, data_dict): log.debug('Harvest source %s deleted', source_id) return source + +def _configure_logger(config): + # Log scope + # + # -1 - do not log to the database + # 0 - log everything + # 1 - model, logic.action, logic.validators, harvesters + # 2 - model, logic.action, logic.validators + # 3 - model, logic.action + # 4 - logic.action + # 5 - model + # 6 - plugin + # 7 - harvesters + # + scope = p.toolkit.asint(config.get('ckan.harvest.log_scope', 0)) + if scope == -1: + return + + parent_logger = 'ckanext.harvest' + children = ['plugin', 'model', 'logic.action.create', 'logic.action.delete', + 'logic.action.get', 'logic.action.patch', 'action.update', + 'logic.validators', 'harvesters.base', 'harvesters.ckanharvester'] + + children_ = {0: children, 1: children[1:], 2: children[1:-2], + 3: children[1:-3], 4: children[2:-3], 5: children[1:2], + 6: children[:1], 7: children[-2:]} + + # Get log level from config param - default: DEBUG + from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL + level_ = config.get('ckan.harvest.log_level', 'debug').upper() + if level_ == 'DEBUG': + level_ = DEBUG + elif level_ == 'INFO': + level_ = INFO + elif level_ == 'WARNING': + level_ = WARNING + elif level_ == 'ERROR': + level_ = ERROR + elif level_ == 'CRITICAL': + level_ = CRITICAL + else: + level_ = DEBUG + + loggers = children_.get(scope) + + # Get root logger and set db handler + logger = getLogger(parent_logger) + if scope < 1: + logger.addHandler(DBLogHandler(level=level_)) + + # Set db handler to all child loggers + for _ in loggers: + child_logger = logger.getChild(_) + child_logger.addHandler(DBLogHandler(level=level_))