From 0748249e1fb648a333f68b7272e0bfcdbc3ec072 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 24 Aug 2022 01:52:08 +0300 Subject: [PATCH] feat: experimental measurement protol reporting --- ckanext/googleanalytics/plugin/__init__.py | 17 +--- ckanext/googleanalytics/utils.py | 93 ++++++++++++++++++++++ ckanext/googleanalytics/views.py | 42 ++++++---- 3 files changed, 123 insertions(+), 29 deletions(-) diff --git a/ckanext/googleanalytics/plugin/__init__.py b/ckanext/googleanalytics/plugin/__init__.py index 84c45e9..1377389 100644 --- a/ckanext/googleanalytics/plugin/__init__.py +++ b/ckanext/googleanalytics/plugin/__init__.py @@ -1,18 +1,15 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -from six.moves.urllib.parse import urlencode import logging import threading -import requests - import ckan.plugins as p import ckan.plugins.toolkit as tk from ckan.exceptions import CkanConfigurationException, CkanVersionException -from .. import helpers +from .. import helpers, utils from ..logic import action, auth log = logging.getLogger(__name__) @@ -39,16 +36,8 @@ class AnalyticsPostThread(threading.Thread): def run(self): while True: # grabs host from queue - data_dict = self.queue.get() - - data = urlencode(data_dict) - log.debug("Sending API event to Google Analytics: " + data) - # send analytics - requests.post( - "http://www.google-analytics.com/collect", - data, - timeout=10, - ) + data = self.queue.get() + utils.send_event(data) # signals to queue job is done self.queue.task_done() diff --git a/ckanext/googleanalytics/utils.py b/ckanext/googleanalytics/utils.py index 3aeef78..0932fee 100644 --- a/ckanext/googleanalytics/utils.py +++ b/ckanext/googleanalytics/utils.py @@ -1,9 +1,16 @@ import ast +import json +import logging +import requests +from six.moves.urllib.parse import urlencode import ckantoolkit as tk +log = logging.getLogger(__name__) + DEFAULT_RESOURCE_URL_TAG = "/downloads/" DEFAULT_RECENT_VIEW_DAYS = 14 +EVENT_API = "CKAN API Request" def config_id(): @@ -26,6 +33,20 @@ def config_tracking_mode(): return "ga" +def config_measurement_protocol_client_id(): + return tk.config.get("googleanalytics.measurement_protocol.client_id") + + +def config_measurement_protocol_client_secret(): + return tk.config.get("googleanalytics.measurement_protocol.client_secret") + + +def config_measurement_protocol_api_whitelist(): + return tk.aslist( + tk.config.get("googleanalytics.measurement_protocol.api_tracking_whitelist") + ) + + def config_account(): return tk.config.get("googleanalytics.account") @@ -74,3 +95,75 @@ def config_recent_view_days(): "googleanalytics.recent_view_days", DEFAULT_RECENT_VIEW_DAYS ) ) + + +def send_event(data): + if isinstance(data, MeasurementProtocolData): + if data["event"] != EVENT_API: + log.warning("Only API event supported by Measurement Protocol at the moment") + return + + return _mp_api_handler({ + "action": data["object"], + "payload": data["payload"], + }) + + + return _ga_handler(data) + + +class SafeJSONEncoder(json.JSONEncoder): + def default(self, _): + return None + + +def _mp_api_handler(data_dict): + whitelist = set(config_measurement_protocol_api_whitelist()) + if whitelist and data_dict["action"] not in whitelist: + log.debug( + "Skip sending %s API action to Google Analytics because it is not whitelisted", + data_dict["action"] + ) + return + + log.debug( + "Sending API event to Google Analytics using the Measurement Protocol: %s", + data_dict + ) + resp = requests.post( + "https://www.google-analytics.com/mp/collect", + params={ + "api_secret": config_measurement_protocol_client_secret(), + "measurement_id": config_id() + }, + data=json.dumps({ + "client_id": config_measurement_protocol_client_id(), + "non_personalized_ads": False, + "events":[{ + "name": data_dict["action"], + "params": data_dict["payload"] + }] + }, cls=SafeJSONEncoder) + ) + # breakpoint() + if resp.status_code >= 300: + log.error("Cannot post event: %s", resp) + + +def _ga_handler(data_dict): + data = urlencode(data_dict) + log.debug("Sending API event to Google Analytics: %s", data) + + requests.post( + "http://www.google-analytics.com/collect", + data, + timeout=10, + ) + + +class UniversalAnalyticsData(dict): + pass + + +class MeasurementProtocolData(dict): + pass diff --git a/ckanext/googleanalytics/views.py b/ckanext/googleanalytics/views.py index 904d76e..58f73e3 100644 --- a/ckanext/googleanalytics/views.py +++ b/ckanext/googleanalytics/views.py @@ -12,6 +12,7 @@ import ckan.views.api as api import ckan.views.resource as resource from ckan.common import g +from . import utils CONFIG_HANDLER_PATH = "googleanalytics.download_handler" @@ -30,7 +31,7 @@ def action(logic_function, ver=api.API_MAX_VERSION): id = request_data["q"] if "query" in request_data: id = request_data[u"query"] - _post_analytics(g.user, "CKAN API Request", logic_function, "", id) + _post_analytics(g.user, utils.EVENT_API, logic_function, "", id, request_data) except Exception as e: log.debug(e) pass @@ -87,23 +88,34 @@ ga.add_url_rule( def _post_analytics( - user, event_type, request_obj_type, request_function, request_id + user, event_type, + request_obj_type, request_function, + request_id, request_payload=None ): from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin if tk.config.get("googleanalytics.id"): - data_dict = { - "v": 1, - "tid": tk.config.get("googleanalytics.id"), - "cid": hashlib.md5(six.ensure_binary(tk.c.user)).hexdigest(), - # customer id should be obfuscated - "t": "event", - "dh": tk.request.environ["HTTP_HOST"], - "dp": tk.request.environ["PATH_INFO"], - "dr": tk.request.environ.get("HTTP_REFERER", ""), - "ec": event_type, - "ea": request_obj_type + request_function, - "el": request_id, - } + if utils.config_measurement_protocol_client_id() and event_type == utils.EVENT_API: + data_dict = utils.MeasurementProtocolData({ + "event": event_type, + "object": event_type, + "function": event_type, + "id": request_id, + "payload": request_payload, + }) + else: + data_dict = utils.UniversalAnalyticsData({ + "v": 1, + "tid": tk.config.get("googleanalytics.id"), + "cid": hashlib.md5(six.ensure_binary(tk.c.user)).hexdigest(), + # customer id should be obfuscated + "t": "event", + "dh": tk.request.environ["HTTP_HOST"], + "dp": tk.request.environ["PATH_INFO"], + "dr": tk.request.environ.get("HTTP_REFERER", ""), + "ec": event_type, + "ea": request_obj_type + request_function, + "el": request_id, + }) GoogleAnalyticsPlugin.analytics_queue.put(data_dict)