Add config declarations

This commit is contained in:
Sergey Motornyuk 2022-05-06 19:01:29 +03:00
parent b70b26f392
commit 7f5c2c03d9
17 changed files with 383 additions and 140 deletions

View File

@ -8,14 +8,12 @@ import logging
import click import click
import ckan.model as model import ckan.model as model
from . import dbutil from . import dbutil, utils
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
PACKAGE_URL = "/dataset/" # XXX get from routes... PACKAGE_URL = "/dataset/" # XXX get from routes...
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
DEFAULT_RECENT_VIEW_DAYS = 14
RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)") RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)")
DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)") DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)")
@ -59,27 +57,13 @@ def load(credentials, start_date):
else: else:
query = "ga:pagePath=~%s,ga:pagePath=~%s" % ( query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
PACKAGE_URL, PACKAGE_URL,
_resource_url_tag(), utils.config_prefix(),
) )
packages_data = get_ga_data(service, profile_id, query_filter=query) packages_data = get_ga_data(service, profile_id, query_filter=query)
save_ga_data(packages_data) save_ga_data(packages_data)
log.info("Saved %s records from google" % len(packages_data)) log.info("Saved %s records from google" % len(packages_data))
def _resource_url_tag():
return tk.config.get(
"googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG
)
def _recent_view_days():
return tk.asint(
tk.config.get(
"googleanalytics.recent_view_days", DEFAULT_RECENT_VIEW_DAYS
)
)
############################################################################### ###############################################################################
# xxx # # xxx #
############################################################################### ###############################################################################
@ -139,7 +123,7 @@ def internal_save(packages_data, summary_date):
AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - %s AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - %s
) + t1.count ) + t1.count
WHERE t1.running_total = 0 AND tracking_type = 'resource';""" WHERE t1.running_total = 0 AND tracking_type = 'resource';"""
engine.execute(sql, _recent_view_days()) engine.execute(sql, utils.config_recent_view_days())
# update summary totals for pages # update summary totals for pages
sql = """UPDATE tracking_summary t1 sql = """UPDATE tracking_summary t1
@ -158,7 +142,7 @@ def internal_save(packages_data, summary_date):
WHERE t1.running_total = 0 AND tracking_type = 'page' WHERE t1.running_total = 0 AND tracking_type = 'page'
AND t1.package_id IS NOT NULL AND t1.package_id IS NOT NULL
AND t1.package_id != '~~not~found~~';""" AND t1.package_id != '~~not~found~~';"""
engine.execute(sql, _recent_view_days()) engine.execute(sql, utils.config_recent_view_days())
def bulk_import(service, profile_id, start_date=None): def bulk_import(service, profile_id, start_date=None):
@ -209,7 +193,7 @@ def get_ga_data_new(service, profile_id, start_date=None, end_date=None):
packages = {} packages = {}
query = "ga:pagePath=~%s,ga:pagePath=~%s" % ( query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
PACKAGE_URL, PACKAGE_URL,
_resource_url_tag(), utils.config_prefix(),
) )
metrics = "ga:uniquePageviews" metrics = "ga:uniquePageviews"
sort = "-ga:uniquePageviews" sort = "-ga:uniquePageviews"
@ -259,7 +243,7 @@ def save_ga_data(packages_data):
ever = visits.get("ever", 0) ever = visits.get("ever", 0)
matches = RESOURCE_URL_REGEX.match(identifier) matches = RESOURCE_URL_REGEX.match(identifier)
if matches: if matches:
resource_url = identifier[len(_resource_url_tag()) :] resource_url = identifier[len(utils.config_prefix()) :]
resource = ( resource = (
model.Session.query(model.Resource) model.Session.query(model.Resource)
.autoflush(True) .autoflush(True)
@ -331,7 +315,7 @@ def get_ga_data(service, profile_id, query_filter):
{'identifier': {'recent':3, 'ever':6}} {'identifier': {'recent':3, 'ever':6}}
""" """
now = datetime.datetime.now() now = datetime.datetime.now()
recent_date = now - datetime.timedelta(_recent_view_days()) recent_date = now - datetime.timedelta(utils.config_recent_view_days())
recent_date = recent_date.strftime("%Y-%m-%d") recent_date = recent_date.strftime("%Y-%m-%d")
floor_date = datetime.date(2005, 1, 1) floor_date = datetime.date(2005, 1, 1)
packages = {} packages = {}

View File

@ -9,14 +9,11 @@ import time
from pylons import config as pylonsconfig from pylons import config as pylonsconfig
from ckan.lib.cli import CkanCommand from ckan.lib.cli import CkanCommand
import ckan.model as model import ckan.model as model
from ckan.plugins.toolkit import asint
from . import dbutil from . import dbutil, utils
log = logging.getLogger("ckanext.googleanalytics") log = logging.getLogger("ckanext.googleanalytics")
PACKAGE_URL = "/dataset/" # XXX get from routes... PACKAGE_URL = "/dataset/" # XXX get from routes...
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
DEFAULT_RECENT_VIEW_DAYS = 14
RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)") RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)")
DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)") DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)")
@ -61,14 +58,8 @@ class LoadAnalytics(CkanCommand):
self._load_config() self._load_config()
self.CONFIG = pylonsconfig self.CONFIG = pylonsconfig
self.resource_url_tag = self.CONFIG.get( self.resource_url_tag = utils.config_prefix()
"googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG self.recent_view_days = utils.config_recent_view_days()
)
self.recent_view_days = asint(
self.CONFIG.get(
"googleanalytics.recent_view_days", DEFAULT_RECENT_VIEW_DAYS
)
)
# funny dance we need to do to make sure we've got a # funny dance we need to do to make sure we've got a
# configured session # configured session

View File

@ -0,0 +1,30 @@
version: 1
groups:
- annotation: GoogleAnalytics settings
options:
- key: googleanalytics.id
required: true
placeholder: UA-000000000-1
- key: googleanalytics.download_handler
default: ckan.views.resource:download
- key: googleanalytics.account
- key: googleanalytics.domain
default: auto
- key: googleanalytics.fields
default: "{}"
- key: googleanalytics.linked_domains
default: ""
- key: googleanalytics.enable_user_id
type: bool
- key: googleanalytics_resource_prefix
default: "/downloads/"
- key: googleanalytics.recent_view_days
default: 14

View File

@ -7,7 +7,6 @@ from . import dbutil
import ckan.logic as logic import ckan.logic as logic
import hashlib import hashlib
from . import plugin from . import plugin
from pylons import config
from paste.util.multidict import MultiDict from paste.util.multidict import MultiDict
@ -15,6 +14,7 @@ from ckan.controllers.api import ApiController
from ckan.exceptions import CkanVersionException from ckan.exceptions import CkanVersionException
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
from ckanext.googleanalytics import utils
try: try:
tk.requires_ckan_version("2.9") tk.requires_ckan_version("2.9")
@ -39,21 +39,20 @@ class GAApiController(ApiController):
def _post_analytics( def _post_analytics(
self, user, request_obj_type, request_function, request_id self, user, request_obj_type, request_function, request_id
): ):
if config.get("googleanalytics.id"): data_dict = {
data_dict = { "v": 1,
"v": 1, "tid": utils.config_id(),
"tid": config.get("googleanalytics.id"), "cid": hashlib.md5(user).hexdigest(),
"cid": hashlib.md5(user).hexdigest(), # customer id should be obfuscated
# customer id should be obfuscated "t": "event",
"t": "event", "dh": c.environ["HTTP_HOST"],
"dh": c.environ["HTTP_HOST"], "dp": c.environ["PATH_INFO"],
"dp": c.environ["PATH_INFO"], "dr": c.environ.get("HTTP_REFERER", ""),
"dr": c.environ.get("HTTP_REFERER", ""), "ec": "CKAN API Request",
"ec": "CKAN API Request", "ea": request_obj_type + request_function,
"ea": request_obj_type + request_function, "el": request_id,
"el": request_id, }
} plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
def action(self, logic_function, ver=None): def action(self, logic_function, ver=None):
try: try:

View File

@ -2,15 +2,7 @@ import httplib2
from apiclient.discovery import build from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials from oauth2client.service_account import ServiceAccountCredentials
from ckan.exceptions import CkanVersionException from ckanext.googleanalytics import utils
import ckan.plugins.toolkit as tk
try:
tk.requires_ckan_version("2.9")
except CkanVersionException:
from pylons import config
else:
config = tk.config
def _prepare_credentials(credentials_filename): def _prepare_credentials(credentials_filename):
@ -51,8 +43,8 @@ def get_profile_id(service):
if not accounts.get("items"): if not accounts.get("items"):
return None return None
accountName = config.get("googleanalytics.account") accountName = utils.config_account()
webPropertyId = config.get("googleanalytics.id") webPropertyId = utils.config_id()
for acc in accounts.get("items"): for acc in accounts.get("items"):
if acc.get("name") == accountName: if acc.get("name") == accountName:
accountId = acc.get("id") accountId = acc.get("id")

View File

@ -1,15 +1,20 @@
import ast
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
from ckanext.googleanalytics import utils
def get_helpers(): def get_helpers():
return { return {
"googleanalytics_header": header, "googleanalytics_header": googleanalytics_header,
"googleanalytics_resource_prefix": googleanalytics_resource_prefix,
} }
def header():
def googleanalytics_resource_prefix():
return utils.config_prefix()
def googleanalytics_header():
"""Render the googleanalytics_header snippet for CKAN 2.0 templates. """Render the googleanalytics_header snippet for CKAN 2.0 templates.
This is a template helper function that renders the This is a template helper function that renders the
@ -18,50 +23,17 @@ def header():
""" """
fields = _fields() fields = utils.config_fields()
if _enable_user_id() and tk.c.user: if utils.config_enable_user_id() and tk.c.user:
fields["userId"] = str(tk.c.userobj.id) fields["userId"] = str(tk.c.userobj.id)
data = { data = {
"googleanalytics_id": _id(), "googleanalytics_id": utils.config_id(),
"googleanalytics_domain": _domain(), "googleanalytics_domain": utils.config_domain(),
"googleanalytics_fields": str(fields), "googleanalytics_fields": str(fields),
"googleanalytics_linked_domains": _linked_domains(), "googleanalytics_linked_domains": utils.config_linked_domains(),
} }
return tk.render_snippet( return tk.render_snippet(
"googleanalytics/snippets/googleanalytics_header.html", data "googleanalytics/snippets/googleanalytics_header.html", data
) )
def _id():
return tk.config["googleanalytics.id"]
def _domain():
return tk.config.get(
"googleanalytics.domain", "auto"
)
def _fields():
fields = ast.literal_eval(
tk.config.get("googleanalytics.fields", "{}")
)
if _linked_domains():
fields["allowLinker"] = "true"
return fields
def _linked_domains():
googleanalytics_linked_domains = tk.config.get(
"googleanalytics.linked_domains", ""
)
return [
x.strip() for x in googleanalytics_linked_domains.split(",") if x
]
def _enable_user_id():
return tk.asbool(
tk.config.get("googleanalytics.enable_user_id", False)
)

View File

@ -0,0 +1 @@
Generic single-database configuration.

View File

@ -0,0 +1,74 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
script_location = %(here)s
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
# timezone to use when rendering the date
# within the migration file as well as the filename.
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =
# max length of characters to apply to the
# "slug" field
#truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; this defaults
# to /home/sergey/Projects/core/ckanext-googleanalytics/ckanext/googleanalytics/migration/googleanalytics/versions. When using multiple version
# directories, initial revisions must be specified with --version-path
# version_locations = %(here)s/bar %(here)s/bat /home/sergey/Projects/core/ckanext-googleanalytics/ckanext/googleanalytics/migration/googleanalytics/versions
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
sqlalchemy.url = driver://user:pass@localhost/dbname
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
import os
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = None
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
name = os.path.basename(os.path.dirname(__file__))
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option(u"sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
version_table=u"{}_alembic_version".format(name),
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix=u"sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
version_table=u"{}_alembic_version".format(name),
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@ -0,0 +1,24 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}

View File

@ -0,0 +1,50 @@
"""empty message
Revision ID: b74febeb899b
Revises:
Create Date: 2022-05-06 17:46:09.398679
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.engine.reflection import Inspector
# revision identifiers, used by Alembic.
revision = "b74febeb899b"
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
conn = op.get_bind()
inspector = Inspector.from_engine(conn)
tables = inspector.get_table_names()
if "package_stats" not in tables:
_create_package_stats()
if "resource_stats" not in tables:
_create_resource_stats()
def downgrade():
op.drop_table("resource_stats")
op.drop_table("package_stats")
def _create_package_stats():
op.create_table(
"package_stats",
sa.Column("package_id", sa.String(60), primary_key=True),
sa.Column("visits_recently", sa.Integer),
sa.Column("visits_ever", sa.Integer),
)
def _create_resource_stats():
op.create_table(
"resource_stats",
sa.Column("resource_id", sa.String(60), primary_key=True),
sa.Column("visits_recently", sa.Integer),
sa.Column("visits_ever", sa.Integer),
)

View File

@ -14,8 +14,6 @@ from ckan.exceptions import CkanConfigurationException, CkanVersionException
from ckanext.googleanalytics import helpers from ckanext.googleanalytics import helpers
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
try: try:
@ -60,7 +58,6 @@ class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin):
p.implements(p.IConfigurer, inherit=True) p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers) p.implements(p.ITemplateHelpers)
def configure(self, config): def configure(self, config):
# spawn a pool of 5 threads, and pass them queue instance # spawn a pool of 5 threads, and pass them queue instance
for _i in range(5): for _i in range(5):
@ -76,18 +73,9 @@ class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin):
msg = "Missing googleanalytics.id in config" msg = "Missing googleanalytics.id in config"
raise GoogleAnalyticsException(msg) raise GoogleAnalyticsException(msg)
# If resource_prefix is not in config file then write the default value
# to the config dict, otherwise templates seem to get 'true' when they
# try to read resource_prefix from config.
if "googleanalytics_resource_prefix" not in config:
config[
"googleanalytics_resource_prefix"
] = DEFAULT_RESOURCE_URL_TAG
def get_helpers(self): def get_helpers(self):
"""Return the CKAN 2.0 template helper functions this plugin provides.
See ITemplateHelpers.
"""
return helpers.get_helpers() return helpers.get_helpers()
if tk.check_ckan_version("2.10"):
tk.blanket.config_declarations(GoogleAnalyticsPlugin)

View File

@ -6,9 +6,9 @@ import importlib
import ckan.plugins as plugins import ckan.plugins as plugins
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
from ckanext.googleanalytics import utils
from ckan.controllers.package import PackageController from ckan.controllers.package import PackageController
from pylons import config
from routes.mapper import SubMapper from routes.mapper import SubMapper
@ -147,19 +147,17 @@ def wrap_resource_download(func):
def _post_analytics( def _post_analytics(
user, event_type, request_obj_type, request_function, request_id user, event_type, request_obj_type, request_function, request_id
): ):
data_dict = {
if config.get("googleanalytics.id"): "v": 1,
data_dict = { "tid": utils.config_id(),
"v": 1, "cid": hashlib.md5(tk.c.user).hexdigest(),
"tid": config.get("googleanalytics.id"), # customer id should be obfuscated
"cid": hashlib.md5(tk.c.user).hexdigest(), "t": "event",
# customer id should be obfuscated "dh": tk.c.environ["HTTP_HOST"],
"t": "event", "dp": tk.c.environ["PATH_INFO"],
"dh": tk.c.environ["HTTP_HOST"], "dr": tk.c.environ.get("HTTP_REFERER", ""),
"dp": tk.c.environ["PATH_INFO"], "ec": event_type,
"dr": tk.c.environ.get("HTTP_REFERER", ""), "ea": request_obj_type + request_function,
"ec": event_type, "el": request_id,
"ea": request_obj_type + request_function, }
"el": request_id, GAMixinPlugin.analytics_queue.put(data_dict)
}
GAMixinPlugin.analytics_queue.put(data_dict)

View File

@ -13,7 +13,7 @@
{% set type = 'asset' if h.ckan_version().split('.')[1] | int >= 9 else 'resource' %} {% set type = 'asset' if h.ckan_version().split('.')[1] | int >= 9 else 'resource' %}
{% include 'googleanalytics/snippets/event_tracking_' ~ type ~ '.html' %} {% include 'googleanalytics/snippets/event_tracking_' ~ type ~ '.html' %}
<div class="js-hide" data-module="google-analytics" <div class="js-hide" data-module="google-analytics"
data-module-googleanalytics_resource_prefix="{{ g.googleanalytics_resource_prefix }}"> data-module-googleanalytics_resource_prefix="{{ h.googleanalytics_resource_prefix() }}">
</div> </div>
{% endblock %} {% endblock %}
{% endblock %} {% endblock %}

View File

@ -0,0 +1,52 @@
import ast
import ckantoolkit as tk
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
DEFAULT_RECENT_VIEW_DAYS = 14
def config_id():
return tk.config["googleanalytics.id"]
def config_account():
return tk.config.get("googleanalytics.account")
def config_domain():
return tk.config.get("googleanalytics.domain", "auto")
def config_fields():
fields = ast.literal_eval(tk.config.get("googleanalytics.fields", "{}"))
if config_linked_domains():
fields["allowLinker"] = "true"
return fields
def config_linked_domains():
googleanalytics_linked_domains = tk.config.get(
"googleanalytics.linked_domains", ""
)
return [x.strip() for x in googleanalytics_linked_domains.split(",") if x]
def config_enable_user_id():
return tk.asbool(tk.config.get("googleanalytics.enable_user_id", False))
def config_prefix():
return tk.config.get(
"googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG
)
def config_recent_view_days():
return tk.asint(
tk.config.get(
"googleanalytics.recent_view_days", DEFAULT_RECENT_VIEW_DAYS
)
)

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
gdata>=2.0.0
google-api-python-client>=1.6.1, <1.7.0
pyOpenSSL>=16.2.0
rsa>=3.1.4, <=4.0

View File

@ -22,7 +22,7 @@ keywords =
[options] [options]
# python_requires = >= 3.7 # python_requires = >= 3.7
install_requires = install_requires =
typing_extensions ckantoolkit
packages = find: packages = find:
namespace_packages = ckanext namespace_packages = ckanext