2.9 support

This commit is contained in:
Sergey Motornyuk 2019-11-22 16:59:22 +02:00
parent a365fcdf6d
commit a5be073c04
15 changed files with 759 additions and 533 deletions

View File

@ -1,9 +1,11 @@
# this is a namespace package # this is a namespace package
try: try:
import pkg_resources import pkg_resources
pkg_resources.declare_namespace(__name__) pkg_resources.declare_namespace(__name__)
except ImportError: except ImportError:
import pkgutil import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__) __path__ = pkgutil.extend_path(__path__, __name__)
try: try:

View File

@ -1,7 +1,9 @@
# this is a namespace package # this is a namespace package
try: try:
import pkg_resources import pkg_resources
pkg_resources.declare_namespace(__name__) pkg_resources.declare_namespace(__name__)
except ImportError: except ImportError:
import pkgutil import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__) __path__ = pkgutil.extend_path(__path__, __name__)

View File

@ -10,18 +10,19 @@ import ckan.model as model
import dbutil import dbutil
log = logging.getLogger('ckanext.googleanalytics') log = logging.getLogger("ckanext.googleanalytics")
PACKAGE_URL = '/dataset/' # XXX get from routes... PACKAGE_URL = "/dataset/" # XXX get from routes...
DEFAULT_RESOURCE_URL_TAG = '/downloads/' DEFAULT_RESOURCE_URL_TAG = "/downloads/"
RESOURCE_URL_REGEX = re.compile('/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)') RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)")
DATASET_EDIT_REGEX = re.compile('/dataset/edit/([a-z0-9-_]+)') DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)")
class InitDB(CkanCommand): class InitDB(CkanCommand):
"""Initialise the local stats database tables """Initialise the local stats database tables
""" """
summary = __doc__.split('\n')[0]
summary = __doc__.split("\n")[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
@ -44,7 +45,8 @@ class LoadAnalytics(CkanCommand):
date specifies start date for retrieving date specifies start date for retrieving
analytics data YYYY-MM-DD format analytics data YYYY-MM-DD format
""" """
summary = __doc__.split('\n')[0]
summary = __doc__.split("\n")[0]
usage = __doc__ usage = __doc__
max_args = 3 max_args = 3
min_args = 1 min_args = 1
@ -57,8 +59,8 @@ class LoadAnalytics(CkanCommand):
self.CONFIG = pylonsconfig self.CONFIG = pylonsconfig
self.resource_url_tag = self.CONFIG.get( self.resource_url_tag = self.CONFIG.get(
'googleanalytics_resource_prefix', "googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG
DEFAULT_RESOURCE_URL_TAG) )
# funny dance we need to do to make sure we've got a # funny dance we need to do to make sure we've got a
# configured session # configured session
@ -69,41 +71,44 @@ class LoadAnalytics(CkanCommand):
def internal_save(self, packages_data, summary_date): def internal_save(self, packages_data, summary_date):
engine = model.meta.engine engine = model.meta.engine
# clear out existing data before adding new # clear out existing data before adding new
sql = '''DELETE FROM tracking_summary sql = (
WHERE tracking_date='%s'; ''' % summary_date """DELETE FROM tracking_summary
WHERE tracking_date='%s'; """
% summary_date
)
engine.execute(sql) engine.execute(sql)
for url, count in packages_data.iteritems(): for url, count in packages_data.iteritems():
# If it matches the resource then we should mark it as a resource. # If it matches the resource then we should mark it as a resource.
# For resources we don't currently find the package ID. # For resources we don't currently find the package ID.
if RESOURCE_URL_REGEX.match(url): if RESOURCE_URL_REGEX.match(url):
tracking_type = 'resource' tracking_type = "resource"
else: else:
tracking_type = 'page' tracking_type = "page"
sql = '''INSERT INTO tracking_summary sql = """INSERT INTO tracking_summary
(url, count, tracking_date, tracking_type) (url, count, tracking_date, tracking_type)
VALUES (%s, %s, %s, %s);''' VALUES (%s, %s, %s, %s);"""
engine.execute(sql, url, count, summary_date, tracking_type) engine.execute(sql, url, count, summary_date, tracking_type)
# get ids for dataset urls # get ids for dataset urls
sql = '''UPDATE tracking_summary t sql = """UPDATE tracking_summary t
SET package_id = COALESCE( SET package_id = COALESCE(
(SELECT id FROM package p WHERE t.url = %s || p.name) (SELECT id FROM package p WHERE t.url = %s || p.name)
,'~~not~found~~') ,'~~not~found~~')
WHERE t.package_id IS NULL AND tracking_type = 'page';''' WHERE t.package_id IS NULL AND tracking_type = 'page';"""
engine.execute(sql, PACKAGE_URL) engine.execute(sql, PACKAGE_URL)
# get ids for dataset edit urls which aren't captured otherwise # get ids for dataset edit urls which aren't captured otherwise
sql = '''UPDATE tracking_summary t sql = """UPDATE tracking_summary t
SET package_id = COALESCE( SET package_id = COALESCE(
(SELECT id FROM package p WHERE t.url = %s || p.name) (SELECT id FROM package p WHERE t.url = %s || p.name)
,'~~not~found~~') ,'~~not~found~~')
WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';''' WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';"""
engine.execute(sql, '%sedit/' % PACKAGE_URL) engine.execute(sql, "%sedit/" % PACKAGE_URL)
# update summary totals for resources # update summary totals for resources
sql = '''UPDATE tracking_summary t1 sql = """UPDATE tracking_summary t1
SET running_total = ( SET running_total = (
SELECT sum(count) SELECT sum(count)
FROM tracking_summary t2 FROM tracking_summary t2
@ -116,11 +121,11 @@ class LoadAnalytics(CkanCommand):
WHERE t1.url = t2.url WHERE t1.url = t2.url
AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - 14 AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - 14
) + t1.count ) + t1.count
WHERE t1.running_total = 0 AND tracking_type = 'resource';''' WHERE t1.running_total = 0 AND tracking_type = 'resource';"""
engine.execute(sql) engine.execute(sql)
# update summary totals for pages # update summary totals for pages
sql = '''UPDATE tracking_summary t1 sql = """UPDATE tracking_summary t1
SET running_total = ( SET running_total = (
SELECT sum(count) SELECT sum(count)
FROM tracking_summary t2 FROM tracking_summary t2
@ -135,23 +140,23 @@ class LoadAnalytics(CkanCommand):
) + t1.count ) + t1.count
WHERE t1.running_total = 0 AND tracking_type = 'page' WHERE t1.running_total = 0 AND tracking_type = 'page'
AND t1.package_id IS NOT NULL AND t1.package_id IS NOT NULL
AND t1.package_id != '~~not~found~~';''' AND t1.package_id != '~~not~found~~';"""
engine.execute(sql) engine.execute(sql)
def bulk_import(self): def bulk_import(self):
if len(self.args) == 3: if len(self.args) == 3:
# Get summeries from specified date # Get summeries from specified date
start_date = datetime.datetime.strptime(self.args[2], '%Y-%m-%d') start_date = datetime.datetime.strptime(self.args[2], "%Y-%m-%d")
else: else:
# No date given. See when we last have data for and get data # No date given. See when we last have data for and get data
# from 2 days before then in case new data is available. # from 2 days before then in case new data is available.
# If no date here then use 2010-01-01 as the start date # If no date here then use 2010-01-01 as the start date
engine = model.meta.engine engine = model.meta.engine
sql = '''SELECT tracking_date from tracking_summary sql = """SELECT tracking_date from tracking_summary
ORDER BY tracking_date DESC LIMIT 1;''' ORDER BY tracking_date DESC LIMIT 1;"""
result = engine.execute(sql).fetchall() result = engine.execute(sql).fetchall()
if result: if result:
start_date = result[0]['tracking_date'] start_date = result[0]["tracking_date"]
start_date += datetime.timedelta(-2) start_date += datetime.timedelta(-2)
# convert date to datetime # convert date to datetime
combine = datetime.datetime.combine combine = datetime.datetime.combine
@ -161,14 +166,15 @@ class LoadAnalytics(CkanCommand):
end_date = datetime.datetime.now() end_date = datetime.datetime.now()
while start_date < end_date: while start_date < end_date:
stop_date = start_date + datetime.timedelta(1) stop_date = start_date + datetime.timedelta(1)
packages_data = self.get_ga_data_new(start_date=start_date, packages_data = self.get_ga_data_new(
end_date=stop_date) start_date=start_date, end_date=stop_date
)
self.internal_save(packages_data, start_date) self.internal_save(packages_data, start_date)
# sleep to rate limit requests # sleep to rate limit requests
time.sleep(0.25) time.sleep(0.25)
start_date = stop_date start_date = stop_date
log.info('%s received %s' % (len(packages_data), start_date)) log.info("%s received %s" % (len(packages_data), start_date))
print '%s received %s' % (len(packages_data), start_date) print "%s received %s" % (len(packages_data), start_date)
def get_ga_data_new(self, start_date=None, end_date=None): def get_ga_data_new(self, start_date=None, end_date=None):
"""Get raw data from Google Analtyics for packages and """Get raw data from Google Analtyics for packages and
@ -182,32 +188,41 @@ class LoadAnalytics(CkanCommand):
end_date = end_date.strftime("%Y-%m-%d") end_date = end_date.strftime("%Y-%m-%d")
packages = {} packages = {}
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \ query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
(PACKAGE_URL, self.resource_url_tag) PACKAGE_URL,
metrics = 'ga:uniquePageviews' self.resource_url_tag,
sort = '-ga:uniquePageviews' )
metrics = "ga:uniquePageviews"
sort = "-ga:uniquePageviews"
start_index = 1 start_index = 1
max_results = 10000 max_results = 10000
# data retrival is chunked # data retrival is chunked
completed = False completed = False
while not completed: while not completed:
results = self.service.data().ga().get(ids='ga:%s' % self.profile_id, results = (
filters=query, self.service.data()
dimensions='ga:pagePath', .ga()
start_date=start_date, .get(
start_index=start_index, ids="ga:%s" % self.profile_id,
max_results=max_results, filters=query,
metrics=metrics, dimensions="ga:pagePath",
sort=sort, start_date=start_date,
end_date=end_date).execute() start_index=start_index,
result_count = len(results.get('rows', [])) max_results=max_results,
metrics=metrics,
sort=sort,
end_date=end_date,
)
.execute()
)
result_count = len(results.get("rows", []))
if result_count < max_results: if result_count < max_results:
completed = True completed = True
for result in results.get('rows', []): for result in results.get("rows", []):
package = result[0] package = result[0]
package = '/' + '/'.join(package.split('/')[2:]) package = "/" + "/".join(package.split("/")[2:])
count = result[1] count = result[1]
packages[package] = int(count) packages[package] = int(count)
@ -219,25 +234,27 @@ class LoadAnalytics(CkanCommand):
def parse_and_save(self): def parse_and_save(self):
"""Grab raw data from Google Analytics and save to the database""" """Grab raw data from Google Analytics and save to the database"""
from ga_auth import (init_service, get_profile_id) from ga_auth import init_service, get_profile_id
tokenfile = self.args[0] tokenfile = self.args[0]
if not os.path.exists(tokenfile): if not os.path.exists(tokenfile):
raise Exception('Cannot find the token file %s' % self.args[0]) raise Exception("Cannot find the token file %s" % self.args[0])
try: try:
self.service = init_service(self.args[0]) self.service = init_service(self.args[0])
except TypeError as e: except TypeError as e:
raise Exception('Unable to create a service: {0}'.format(e)) raise Exception("Unable to create a service: {0}".format(e))
self.profile_id = get_profile_id(self.service) self.profile_id = get_profile_id(self.service)
if len(self.args) > 1: if len(self.args) > 1:
if len(self.args) > 2 and self.args[1].lower() != 'internal': if len(self.args) > 2 and self.args[1].lower() != "internal":
raise Exception('Illegal argument %s' % self.args[1]) raise Exception("Illegal argument %s" % self.args[1])
self.bulk_import() self.bulk_import()
else: else:
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \ query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
(PACKAGE_URL, self.resource_url_tag) PACKAGE_URL,
self.resource_url_tag,
)
packages_data = self.get_ga_data(query_filter=query) packages_data = self.get_ga_data(query_filter=query)
self.save_ga_data(packages_data) self.save_ga_data(packages_data)
log.info("Saved %s records from google" % len(packages_data)) log.info("Saved %s records from google" % len(packages_data))
@ -246,20 +263,24 @@ class LoadAnalytics(CkanCommand):
"""Save tuples of packages_data to the database """Save tuples of packages_data to the database
""" """
for identifier, visits in packages_data.items(): for identifier, visits in packages_data.items():
recently = visits.get('recent', 0) recently = visits.get("recent", 0)
ever = visits.get('ever', 0) ever = visits.get("ever", 0)
matches = RESOURCE_URL_REGEX.match(identifier) matches = RESOURCE_URL_REGEX.match(identifier)
if matches: if matches:
resource_url = identifier[len(self.resource_url_tag):] resource_url = identifier[len(self.resource_url_tag) :]
resource = model.Session.query(model.Resource).autoflush(True)\ resource = (
.filter_by(id=matches.group(1)).first() model.Session.query(model.Resource)
.autoflush(True)
.filter_by(id=matches.group(1))
.first()
)
if not resource: if not resource:
log.warning("Couldn't find resource %s" % resource_url) log.warning("Couldn't find resource %s" % resource_url)
continue continue
dbutil.update_resource_visits(resource.id, recently, ever) dbutil.update_resource_visits(resource.id, recently, ever)
log.info("Updated %s with %s visits" % (resource.id, visits)) log.info("Updated %s with %s visits" % (resource.id, visits))
else: else:
package_name = identifier[len(PACKAGE_URL):] package_name = identifier[len(PACKAGE_URL) :]
if "/" in package_name: if "/" in package_name:
log.warning("%s not a valid package name" % package_name) log.warning("%s not a valid package name" % package_name)
continue continue
@ -271,8 +292,16 @@ class LoadAnalytics(CkanCommand):
log.info("Updated %s with %s visits" % (item.id, visits)) log.info("Updated %s with %s visits" % (item.id, visits))
model.Session.commit() model.Session.commit()
def ga_query(self, query_filter=None, from_date=None, to_date=None, def ga_query(
start_index=1, max_results=10000, metrics=None, sort=None): self,
query_filter=None,
from_date=None,
to_date=None,
start_index=1,
max_results=10000,
metrics=None,
sort=None,
):
"""Execute a query against Google Analytics """Execute a query against Google Analytics
""" """
if not to_date: if not to_date:
@ -281,22 +310,28 @@ class LoadAnalytics(CkanCommand):
if isinstance(from_date, datetime.date): if isinstance(from_date, datetime.date):
from_date = from_date.strftime("%Y-%m-%d") from_date = from_date.strftime("%Y-%m-%d")
if not metrics: if not metrics:
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews' metrics = "ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews"
if not sort: if not sort:
sort = '-ga:uniquePageviews' sort = "-ga:uniquePageviews"
print '%s -> %s' % (from_date, to_date) print "%s -> %s" % (from_date, to_date)
results = self.service.data().ga().get(ids='ga:' + self.profile_id, results = (
start_date=from_date, self.service.data()
end_date=to_date, .ga()
dimensions='ga:pagePath', .get(
metrics=metrics, ids="ga:" + self.profile_id,
sort=sort, start_date=from_date,
start_index=start_index, end_date=to_date,
filters=query_filter, dimensions="ga:pagePath",
max_results=max_results metrics=metrics,
).execute() sort=sort,
start_index=start_index,
filters=query_filter,
max_results=max_results,
)
.execute()
)
return results return results
def get_ga_data(self, query_filter=None, start_date=None, end_date=None): def get_ga_data(self, query_filter=None, start_date=None, end_date=None):
@ -312,25 +347,31 @@ class LoadAnalytics(CkanCommand):
recent_date = recent_date.strftime("%Y-%m-%d") recent_date = recent_date.strftime("%Y-%m-%d")
floor_date = datetime.date(2005, 1, 1) floor_date = datetime.date(2005, 1, 1)
packages = {} packages = {}
queries = ['ga:pagePath=~%s' % PACKAGE_URL] queries = ["ga:pagePath=~%s" % PACKAGE_URL]
dates = {'recent': recent_date, 'ever': floor_date} dates = {"recent": recent_date, "ever": floor_date}
for date_name, date in dates.iteritems(): for date_name, date in dates.iteritems():
for query in queries: for query in queries:
results = self.ga_query(query_filter=query, results = self.ga_query(
metrics='ga:uniquePageviews', query_filter=query,
from_date=date) metrics="ga:uniquePageviews",
if 'rows' in results: from_date=date,
for result in results.get('rows'): )
if "rows" in results:
for result in results.get("rows"):
package = result[0] package = result[0]
if not package.startswith(PACKAGE_URL): if not package.startswith(PACKAGE_URL):
package = '/' + '/'.join(package.split('/')[2:]) package = "/" + "/".join(package.split("/")[2:])
count = result[1] count = result[1]
# Make sure we add the different representations of the same # Make sure we add the different representations of the same
# dataset /mysite.com & /www.mysite.com ... # dataset /mysite.com & /www.mysite.com ...
val = 0 val = 0
if package in packages and date_name in packages[package]: if (
package in packages
and date_name in packages[package]
):
val += packages[package][date_name] val += packages[package][date_name]
packages.setdefault(package, {})[date_name] = \ packages.setdefault(package, {})[date_name] = (
int(count) + val int(count) + val
)
return packages return packages

View File

@ -11,32 +11,33 @@ from paste.util.multidict import MultiDict
from ckan.controllers.api import ApiController from ckan.controllers.api import ApiController
log = logging.getLogger('ckanext.googleanalytics') log = logging.getLogger("ckanext.googleanalytics")
class GAController(BaseController): class GAController(BaseController):
def view(self): def view(self):
# get package objects corresponding to popular GA content # get package objects corresponding to popular GA content
c.top_resources = dbutil.get_top_resources(limit=10) c.top_resources = dbutil.get_top_resources(limit=10)
return render('summary.html') return render("summary.html")
class GAApiController(ApiController): class GAApiController(ApiController):
# intercept API calls to record via google analytics # intercept API calls to record via google analytics
def _post_analytics( def _post_analytics(
self, user, request_obj_type, request_function, request_id): self, user, request_obj_type, request_function, request_id
if config.get('googleanalytics.id'): ):
if config.get("googleanalytics.id"):
data_dict = { data_dict = {
"v": 1, "v": 1,
"tid": config.get('googleanalytics.id'), "tid": config.get("googleanalytics.id"),
"cid": hashlib.md5(user).hexdigest(), "cid": hashlib.md5(user).hexdigest(),
# customer id should be obfuscated # customer id should be obfuscated
"t": "event", "t": "event",
"dh": c.environ['HTTP_HOST'], "dh": c.environ["HTTP_HOST"],
"dp": c.environ['PATH_INFO'], "dp": c.environ["PATH_INFO"],
"dr": c.environ.get('HTTP_REFERER', ''), "dr": c.environ.get("HTTP_REFERER", ""),
"ec": "CKAN API Request", "ec": "CKAN API Request",
"ea": request_obj_type+request_function, "ea": request_obj_type + request_function,
"el": request_id, "el": request_id,
} }
plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict) plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
@ -44,66 +45,72 @@ class GAApiController(ApiController):
def action(self, logic_function, ver=None): def action(self, logic_function, ver=None):
try: try:
function = logic.get_action(logic_function) function = logic.get_action(logic_function)
side_effect_free = getattr(function, 'side_effect_free', False) side_effect_free = getattr(function, "side_effect_free", False)
request_data = self._get_request_data( request_data = self._get_request_data(
try_url_params=side_effect_free) try_url_params=side_effect_free
)
if isinstance(request_data, dict): if isinstance(request_data, dict):
id = request_data.get('id', '') id = request_data.get("id", "")
if 'q' in request_data: if "q" in request_data:
id = request_data['q'] id = request_data["q"]
if 'query' in request_data: if "query" in request_data:
id = request_data['query'] id = request_data["query"]
self._post_analytics(c.user, logic_function, '', id) self._post_analytics(c.user, logic_function, "", id)
except Exception, e: except Exception, e:
log.debug(e) log.debug(e)
pass pass
return ApiController.action(self, logic_function, ver) return ApiController.action(self, logic_function, ver)
def list(self, ver=None, register=None, def list(self, ver=None, register=None, subregister=None, id=None):
subregister=None, id=None): self._post_analytics(
self._post_analytics(c.user, c.user,
register + register + ("_" + str(subregister) if subregister else ""),
("_"+str(subregister) if subregister else ""), "list",
"list", id,
id) )
return ApiController.list(self, ver, register, subregister, id) return ApiController.list(self, ver, register, subregister, id)
def show(self, ver=None, register=None, def show(
subregister=None, id=None, id2=None): self, ver=None, register=None, subregister=None, id=None, id2=None
self._post_analytics(c.user, ):
register + self._post_analytics(
("_"+str(subregister) if subregister else ""), c.user,
"show", register + ("_" + str(subregister) if subregister else ""),
id) "show",
id,
)
return ApiController.show(self, ver, register, subregister, id, id2) return ApiController.show(self, ver, register, subregister, id, id2)
def update(self, ver=None, register=None, def update(
subregister=None, id=None, id2=None): self, ver=None, register=None, subregister=None, id=None, id2=None
self._post_analytics(c.user, ):
register + self._post_analytics(
("_"+str(subregister) if subregister else ""), c.user,
"update", register + ("_" + str(subregister) if subregister else ""),
id) "update",
id,
)
return ApiController.update(self, ver, register, subregister, id, id2) return ApiController.update(self, ver, register, subregister, id, id2)
def delete(self, ver=None, register=None, def delete(
subregister=None, id=None, id2=None): self, ver=None, register=None, subregister=None, id=None, id2=None
self._post_analytics(c.user, ):
register + self._post_analytics(
("_"+str(subregister) if subregister else ""), c.user,
"delete", register + ("_" + str(subregister) if subregister else ""),
id) "delete",
id,
)
return ApiController.delete(self, ver, register, subregister, id, id2) return ApiController.delete(self, ver, register, subregister, id, id2)
def search(self, ver=None, register=None): def search(self, ver=None, register=None):
id = None id = None
try: try:
params = MultiDict(self._get_search_params(request.params)) params = MultiDict(self._get_search_params(request.params))
if 'q' in params.keys(): if "q" in params.keys():
id = params['q'] id = params["q"]
if 'query' in params.keys(): if "query" in params.keys():
id = params['query'] id = params["query"]
except ValueError, e: except ValueError, e:
log.debug(str(e)) log.debug(str(e))
pass pass

View File

@ -3,6 +3,7 @@ from sqlalchemy.sql import select, text
from sqlalchemy import func from sqlalchemy import func
import ckan.model as model import ckan.model as model
# from ckan.model.authz import PSEUDO_USER__VISITOR # from ckan.model.authz import PSEUDO_USER__VISITOR
from ckan.lib.base import * from ckan.lib.base import *
@ -11,16 +12,20 @@ cached_tables = {}
def init_tables(): def init_tables():
metadata = MetaData() metadata = MetaData()
package_stats = Table('package_stats', metadata, package_stats = Table(
Column('package_id', String(60), "package_stats",
primary_key=True), metadata,
Column('visits_recently', Integer), Column("package_id", String(60), primary_key=True),
Column('visits_ever', Integer)) Column("visits_recently", Integer),
resource_stats = Table('resource_stats', metadata, Column("visits_ever", Integer),
Column('resource_id', String(60), )
primary_key=True), resource_stats = Table(
Column('visits_recently', Integer), "resource_stats",
Column('visits_ever', Integer)) metadata,
Column("resource_id", String(60), primary_key=True),
Column("visits_recently", Integer),
Column("visits_ever", Integer),
)
metadata.create_all(model.meta.engine) metadata.create_all(model.meta.engine)
@ -35,63 +40,68 @@ def get_table(name):
def _update_visits(table_name, item_id, recently, ever): def _update_visits(table_name, item_id, recently, ever):
stats = get_table(table_name) stats = get_table(table_name)
id_col_name = "%s_id" % table_name[:-len("_stats")] id_col_name = "%s_id" % table_name[: -len("_stats")]
id_col = getattr(stats.c, id_col_name) id_col = getattr(stats.c, id_col_name)
s = select([func.count(id_col)], s = select([func.count(id_col)], id_col == item_id)
id_col == item_id)
connection = model.Session.connection() connection = model.Session.connection()
count = connection.execute(s).fetchone() count = connection.execute(s).fetchone()
if count and count[0]: if count and count[0]:
connection.execute(stats.update()\ connection.execute(
.where(id_col == item_id)\ stats.update()
.values(visits_recently=recently, .where(id_col == item_id)
visits_ever=ever)) .values(visits_recently=recently, visits_ever=ever)
)
else: else:
values = {id_col_name: item_id, values = {
'visits_recently': recently, id_col_name: item_id,
'visits_ever': ever} "visits_recently": recently,
connection.execute(stats.insert()\ "visits_ever": ever,
.values(**values)) }
connection.execute(stats.insert().values(**values))
def update_resource_visits(resource_id, recently, ever): def update_resource_visits(resource_id, recently, ever):
return _update_visits("resource_stats", return _update_visits("resource_stats", resource_id, recently, ever)
resource_id,
recently,
ever)
def update_package_visits(package_id, recently, ever): def update_package_visits(package_id, recently, ever):
return _update_visits("package_stats", return _update_visits("package_stats", package_id, recently, ever)
package_id,
recently,
ever)
def get_resource_visits_for_url(url): def get_resource_visits_for_url(url):
connection = model.Session.connection() connection = model.Session.connection()
count = connection.execute( count = connection.execute(
text("""SELECT visits_ever FROM resource_stats, resource text(
"""SELECT visits_ever FROM resource_stats, resource
WHERE resource_id = resource.id WHERE resource_id = resource.id
AND resource.url = :url"""), url=url).fetchone() AND resource.url = :url"""
),
url=url,
).fetchone()
return count and count[0] or "" return count and count[0] or ""
""" get_top_packages is broken, and needs to be rewritten to work with """ get_top_packages is broken, and needs to be rewritten to work with
CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.* CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.*
See commit ffa86c010d5d25fa1881c6b915e48f3b44657612 See commit ffa86c010d5d25fa1881c6b915e48f3b44657612
""" """
def get_top_packages(limit=20): def get_top_packages(limit=20):
items = [] items = []
# caveat emptor: the query below will not filter out private # caveat emptor: the query below will not filter out private
# or deleted datasets (TODO) # or deleted datasets (TODO)
q = model.Session.query(model.Package) q = model.Session.query(model.Package)
connection = model.Session.connection() connection = model.Session.connection()
package_stats = get_table('package_stats') package_stats = get_table("package_stats")
s = select([package_stats.c.package_id, s = select(
package_stats.c.visits_recently, [
package_stats.c.visits_ever])\ package_stats.c.package_id,
.order_by(package_stats.c.visits_recently.desc()) package_stats.c.visits_recently,
package_stats.c.visits_ever,
]
).order_by(package_stats.c.visits_recently.desc())
res = connection.execute(s).fetchmany(limit) res = connection.execute(s).fetchmany(limit)
for package_id, recent, ever in res: for package_id, recent, ever in res:
item = q.filter("package.id = '%s'" % package_id) item = q.filter("package.id = '%s'" % package_id)
@ -104,15 +114,19 @@ def get_top_packages(limit=20):
def get_top_resources(limit=20): def get_top_resources(limit=20):
items = [] items = []
connection = model.Session.connection() connection = model.Session.connection()
resource_stats = get_table('resource_stats') resource_stats = get_table("resource_stats")
s = select([resource_stats.c.resource_id, s = select(
resource_stats.c.visits_recently, [
resource_stats.c.visits_ever])\ resource_stats.c.resource_id,
.order_by(resource_stats.c.visits_recently.desc()) resource_stats.c.visits_recently,
resource_stats.c.visits_ever,
]
).order_by(resource_stats.c.visits_recently.desc())
res = connection.execute(s).fetchmany(limit) res = connection.execute(s).fetchmany(limit)
for resource_id, recent, ever in res: for resource_id, recent, ever in res:
item = model.Session.query(model.Resource)\ item = model.Session.query(model.Resource).filter(
.filter("resource.id = '%s'" % resource_id) "resource.id = '%s'" % resource_id
)
if not item.count(): if not item.count():
continue continue
items.append((item.first(), recent, ever)) items.append((item.first(), recent, ever))

View File

@ -10,10 +10,9 @@ def _prepare_credentials(credentials_filename):
Either returns the user's oauth credentials or uses the credentials Either returns the user's oauth credentials or uses the credentials
file to generate a token (by forcing the user to login in the browser) file to generate a token (by forcing the user to login in the browser)
""" """
scope = ['https://www.googleapis.com/auth/analytics.readonly'] scope = ["https://www.googleapis.com/auth/analytics.readonly"]
credentials = ServiceAccountCredentials.from_json_keyfile_name( credentials = ServiceAccountCredentials.from_json_keyfile_name(
credentials_filename, credentials_filename, scopes=scope
scopes=scope
) )
return credentials return credentials
@ -29,7 +28,7 @@ def init_service(credentials_file):
credentials = _prepare_credentials(credentials_file) credentials = _prepare_credentials(credentials_file)
http = credentials.authorize(http) # authorize the http object http = credentials.authorize(http) # authorize the http object
return build('analytics', 'v3', http=http) return build("analytics", "v3", http=http)
def get_profile_id(service): def get_profile_id(service):
@ -42,23 +41,31 @@ def get_profile_id(service):
""" """
accounts = service.management().accounts().list().execute() accounts = service.management().accounts().list().execute()
if not accounts.get('items'): if not accounts.get("items"):
return None return None
accountName = config.get('googleanalytics.account') accountName = config.get("googleanalytics.account")
webPropertyId = config.get('googleanalytics.id') webPropertyId = config.get("googleanalytics.id")
for acc in accounts.get('items'): for acc in accounts.get("items"):
if acc.get('name') == accountName: if acc.get("name") == accountName:
accountId = acc.get('id') accountId = acc.get("id")
# TODO: check, whether next line is doing something useful. # TODO: check, whether next line is doing something useful.
webproperties = service.management().webproperties().list( webproperties = (
accountId=accountId).execute() service.management()
.webproperties()
.list(accountId=accountId)
.execute()
)
profiles = service.management().profiles().list( profiles = (
accountId=accountId, webPropertyId=webPropertyId).execute() service.management()
.profiles()
.list(accountId=accountId, webPropertyId=webPropertyId)
.execute()
)
if profiles.get('items'): if profiles.get("items"):
return profiles.get('items')[0].get('id') return profiles.get("items")[0].get("id")
return None return None

View File

@ -1,11 +0,0 @@
from ckan import model
def setup():
connection = model.Session.connection()
connection.execute("""CREATE TABLE IF NOT EXISTS package_downloads (
id integer primary_key,
package_id varchar(60),
download_visits integer,
views_visits integer);""")

View File

@ -1,275 +0,0 @@
import ast
import logging
import urllib
import commands
import paste.deploy.converters as converters
from ckan.lib.base import c
import ckan.lib.helpers as h
import ckan.plugins as p
from routes.mapper import SubMapper
from pylons import config
from ckan.controllers.package import PackageController
import urllib2
import importlib
import hashlib
import threading
import Queue
log = logging.getLogger('ckanext.googleanalytics')
def _post_analytics(
user, event_type, request_obj_type, request_function, request_id):
if config.get('googleanalytics.id'):
data_dict = {
"v": 1,
"tid": config.get('googleanalytics.id'),
"cid": hashlib.md5(c.user).hexdigest(),
# customer id should be obfuscated
"t": "event",
"dh": c.environ['HTTP_HOST'],
"dp": c.environ['PATH_INFO'],
"dr": c.environ.get('HTTP_REFERER', ''),
"ec": event_type,
"ea": request_obj_type + request_function,
"el": request_id,
}
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
def wrap_resource_download(func):
def func_wrapper(cls, id, resource_id, filename=None):
_post_analytics(
c.user,
"CKAN Resource Download Request",
"Resource",
"Download",
resource_id
)
return func(cls, id, resource_id, filename=None)
return func_wrapper
class GoogleAnalyticsException(Exception):
pass
class AnalyticsPostThread(threading.Thread):
"""Threaded Url POST"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# grabs host from queue
data_dict = self.queue.get()
data = urllib.urlencode(data_dict)
log.debug("Sending API event to Google Analytics: " + data)
# send analytics
urllib2.urlopen(
"http://www.google-analytics.com/collect",
data,
# timeout in seconds
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
10)
# signals to queue job is done
self.queue.task_done()
class GoogleAnalyticsPlugin(p.SingletonPlugin):
p.implements(p.IConfigurable, inherit=True)
p.implements(p.IRoutes, inherit=True)
p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers)
analytics_queue = Queue.Queue()
def configure(self, config):
'''Load config settings for this extension from config file.
See IConfigurable.
'''
if 'googleanalytics.id' not in config:
msg = "Missing googleanalytics.id in config"
raise GoogleAnalyticsException(msg)
self.googleanalytics_id = config['googleanalytics.id']
self.googleanalytics_domain = config.get(
'googleanalytics.domain', 'auto')
self.googleanalytics_fields = ast.literal_eval(config.get(
'googleanalytics.fields', '{}'))
googleanalytics_linked_domains = config.get(
'googleanalytics.linked_domains', ''
)
self.googleanalytics_linked_domains = [
x.strip() for x in googleanalytics_linked_domains.split(',') if x
]
if self.googleanalytics_linked_domains:
self.googleanalytics_fields['allowLinker'] = 'true'
self.googleanalytics_javascript_url = h.url_for_static(
'/scripts/ckanext-googleanalytics.js')
# If resource_prefix is not in config file then write the default value
# to the config dict, otherwise templates seem to get 'true' when they
# try to read resource_prefix from config.
if 'googleanalytics_resource_prefix' not in config:
config['googleanalytics_resource_prefix'] = (
commands.DEFAULT_RESOURCE_URL_TAG)
self.googleanalytics_resource_prefix = config[
'googleanalytics_resource_prefix']
self.show_downloads = converters.asbool(
config.get('googleanalytics.show_downloads', True))
self.track_events = converters.asbool(
config.get('googleanalytics.track_events', False))
self.enable_user_id = converters.asbool(
config.get('googleanalytics.enable_user_id', False))
if not converters.asbool(config.get('ckan.legacy_templates', 'false')):
p.toolkit.add_resource('fanstatic_library', 'ckanext-googleanalytics')
# spawn a pool of 5 threads, and pass them queue instance
for i in range(5):
t = AnalyticsPostThread(self.analytics_queue)
t.setDaemon(True)
t.start()
def update_config(self, config):
'''Change the CKAN (Pylons) environment configuration.
See IConfigurer.
'''
if converters.asbool(config.get('ckan.legacy_templates', 'false')):
p.toolkit.add_template_directory(config, 'legacy_templates')
p.toolkit.add_public_directory(config, 'legacy_public')
else:
p.toolkit.add_template_directory(config, 'templates')
def before_map(self, map):
'''Add new routes that this extension's controllers handle.
See IRoutes.
'''
# Helpers to reduce code clutter
GET = dict(method=['GET'])
PUT = dict(method=['PUT'])
POST = dict(method=['POST'])
DELETE = dict(method=['DELETE'])
GET_POST = dict(method=['GET', 'POST'])
# intercept API calls that we want to capture analytics on
register_list = [
'package',
'dataset',
'resource',
'tag',
'group',
'related',
'revision',
'licenses',
'rating',
'user',
'activity'
]
register_list_str = '|'.join(register_list)
# /api ver 3 or none
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/3|}',
ver='/3') as m:
m.connect('/action/{logic_function}', action='action',
conditions=GET_POST)
# /api ver 1, 2, 3 or none
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|/3|}',
ver='/1') as m:
m.connect('/search/{register}', action='search')
# /api/rest ver 1, 2 or none
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|}',
ver='/1', requirements=dict(register=register_list_str)
) as m:
m.connect('/rest/{register}', action='list', conditions=GET)
m.connect('/rest/{register}', action='create', conditions=POST)
m.connect('/rest/{register}/{id}', action='show', conditions=GET)
m.connect('/rest/{register}/{id}', action='update', conditions=PUT)
m.connect('/rest/{register}/{id}', action='update', conditions=POST)
m.connect('/rest/{register}/{id}', action='delete', conditions=DELETE)
return map
def after_map(self, map):
'''Add new routes that this extension's controllers handle.
See IRoutes.
'''
self.modify_resource_download_route(map)
map.redirect("/analytics/package/top", "/analytics/dataset/top")
map.connect(
'analytics', '/analytics/dataset/top',
controller='ckanext.googleanalytics.controller:GAController',
action='view'
)
return map
def get_helpers(self):
'''Return the CKAN 2.0 template helper functions this plugin provides.
See ITemplateHelpers.
'''
return {'googleanalytics_header': self.googleanalytics_header}
def googleanalytics_header(self):
'''Render the googleanalytics_header snippet for CKAN 2.0 templates.
This is a template helper function that renders the
googleanalytics_header jinja snippet. To be called from the jinja
templates in this extension, see ITemplateHelpers.
'''
if self.enable_user_id and c.user:
self.googleanalytics_fields['userId'] = str(c.userobj.id)
data = {
'googleanalytics_id': self.googleanalytics_id,
'googleanalytics_domain': self.googleanalytics_domain,
'googleanalytics_fields': str(self.googleanalytics_fields),
'googleanalytics_linked_domains': self.googleanalytics_linked_domains
}
return p.toolkit.render_snippet(
'googleanalytics/snippets/googleanalytics_header.html', data)
def modify_resource_download_route(self, map):
'''Modifies resource_download method in related controller
to attach GA tracking code.
'''
if '_routenames' in map.__dict__:
if 'resource_download' in map.__dict__['_routenames']:
route_data = map.__dict__['_routenames']['resource_download'].__dict__
route_controller = route_data['defaults']['controller'].split(
':')
module = importlib.import_module(route_controller[0])
controller_class = getattr(module, route_controller[1])
controller_class.resource_download = wrap_resource_download(
controller_class.resource_download)
else:
# If no custom uploader applied, use the default one
PackageController.resource_download = wrap_resource_download(
PackageController.resource_download)

View File

@ -0,0 +1,163 @@
from __future__ import absolute_import
import ast
import logging
import urllib
import ckanext.googleanalytics.commands as commands
import paste.deploy.converters as converters
import ckan.lib.helpers as h
import ckan.plugins as p
import ckan.plugins.toolkit as tk
import urllib2
from ckan.exceptions import CkanVersionException
import threading
log = logging.getLogger(__name__)
try:
tk.requires_ckan_version("2.9")
except CkanVersionException:
from ckanext.googleanalytics.plugin.paster_plugin import GAMixinPlugin
else:
from ckanext.googleanalytics.plugin.flask_plugin import GAMixinPlugin
class GoogleAnalyticsException(Exception):
pass
class AnalyticsPostThread(threading.Thread):
"""Threaded Url POST"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# grabs host from queue
data_dict = self.queue.get()
data = urllib.urlencode(data_dict)
log.debug("Sending API event to Google Analytics: " + data)
# send analytics
urllib2.urlopen(
"http://www.google-analytics.com/collect",
data,
# timeout in seconds
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
10,
)
# signals to queue job is done
self.queue.task_done()
class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin):
p.implements(p.IConfigurable, inherit=True)
p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers)
def configure(self, config):
"""Load config settings for this extension from config file.
See IConfigurable.
"""
if "googleanalytics.id" not in config:
msg = "Missing googleanalytics.id in config"
raise GoogleAnalyticsException(msg)
self.googleanalytics_id = config["googleanalytics.id"]
self.googleanalytics_domain = config.get(
"googleanalytics.domain", "auto"
)
self.googleanalytics_fields = ast.literal_eval(
config.get("googleanalytics.fields", "{}")
)
googleanalytics_linked_domains = config.get(
"googleanalytics.linked_domains", ""
)
self.googleanalytics_linked_domains = [
x.strip() for x in googleanalytics_linked_domains.split(",") if x
]
if self.googleanalytics_linked_domains:
self.googleanalytics_fields["allowLinker"] = "true"
self.googleanalytics_javascript_url = h.url_for_static(
"/scripts/ckanext-googleanalytics.js"
)
# If resource_prefix is not in config file then write the default value
# to the config dict, otherwise templates seem to get 'true' when they
# try to read resource_prefix from config.
if "googleanalytics_resource_prefix" not in config:
config[
"googleanalytics_resource_prefix"
] = commands.DEFAULT_RESOURCE_URL_TAG
self.googleanalytics_resource_prefix = config[
"googleanalytics_resource_prefix"
]
self.show_downloads = converters.asbool(
config.get("googleanalytics.show_downloads", True)
)
self.track_events = converters.asbool(
config.get("googleanalytics.track_events", False)
)
self.enable_user_id = converters.asbool(
config.get("googleanalytics.enable_user_id", False)
)
if not converters.asbool(config.get("ckan.legacy_templates", "false")):
p.toolkit.add_resource(
"../fanstatic_library", "ckanext-googleanalytics"
)
# spawn a pool of 5 threads, and pass them queue instance
for i in range(5):
t = AnalyticsPostThread(self.analytics_queue)
t.setDaemon(True)
t.start()
def update_config(self, config):
"""Change the CKAN (Pylons) environment configuration.
See IConfigurer.
"""
if converters.asbool(config.get("ckan.legacy_templates", "false")):
p.toolkit.add_template_directory(config, "../legacy_templates")
p.toolkit.add_public_directory(config, "../legacy_public")
else:
p.toolkit.add_template_directory(config, "../templates")
def get_helpers(self):
"""Return the CKAN 2.0 template helper functions this plugin provides.
See ITemplateHelpers.
"""
return {"googleanalytics_header": self.googleanalytics_header}
def googleanalytics_header(self):
"""Render the googleanalytics_header snippet for CKAN 2.0 templates.
This is a template helper function that renders the
googleanalytics_header jinja snippet. To be called from the jinja
templates in this extension, see ITemplateHelpers.
"""
if self.enable_user_id and tk.c.user:
self.googleanalytics_fields["userId"] = str(tk.c.userobj.id)
data = {
"googleanalytics_id": self.googleanalytics_id,
"googleanalytics_domain": self.googleanalytics_domain,
"googleanalytics_fields": str(self.googleanalytics_fields),
"googleanalytics_linked_domains": self.googleanalytics_linked_domains,
}
return p.toolkit.render_snippet(
"googleanalytics/snippets/googleanalytics_header.html", data
)

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
import Queue
import ckan.plugins as plugins
from ckanext.googleanalytics.views import ga
class GAMixinPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IBlueprint)
analytics_queue = Queue.Queue()
def get_blueprint(self):
return [ga]

View File

@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
import Queue
import hashlib
import importlib
import ckan.plugins as plugins
import ckan.plugins.toolkit as tk
from ckan.controllers.package import PackageController
from pylons import config
from routes.mapper import SubMapper
class GAMixinPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IRoutes)
analytics_queue = Queue.Queue()
def before_map(self, map):
"""Add new routes that this extension's controllers handle.
See IRoutes.
"""
# Helpers to reduce code clutter
GET = dict(method=["GET"])
PUT = dict(method=["PUT"])
POST = dict(method=["POST"])
DELETE = dict(method=["DELETE"])
GET_POST = dict(method=["GET", "POST"])
# intercept API calls that we want to capture analytics on
register_list = [
"package",
"dataset",
"resource",
"tag",
"group",
"related",
"revision",
"licenses",
"rating",
"user",
"activity",
]
register_list_str = "|".join(register_list)
# /api ver 3 or none
with SubMapper(
map,
controller="ckanext.googleanalytics.controller:GAApiController",
path_prefix="/api{ver:/3|}",
ver="/3",
) as m:
m.connect(
"/action/{logic_function}",
action="action",
conditions=GET_POST,
)
# /api ver 1, 2, 3 or none
with SubMapper(
map,
controller="ckanext.googleanalytics.controller:GAApiController",
path_prefix="/api{ver:/1|/2|/3|}",
ver="/1",
) as m:
m.connect("/search/{register}", action="search")
# /api/rest ver 1, 2 or none
with SubMapper(
map,
controller="ckanext.googleanalytics.controller:GAApiController",
path_prefix="/api{ver:/1|/2|}",
ver="/1",
requirements=dict(register=register_list_str),
) as m:
m.connect("/rest/{register}", action="list", conditions=GET)
m.connect("/rest/{register}", action="create", conditions=POST)
m.connect("/rest/{register}/{id}", action="show", conditions=GET)
m.connect("/rest/{register}/{id}", action="update", conditions=PUT)
m.connect(
"/rest/{register}/{id}", action="update", conditions=POST
)
m.connect(
"/rest/{register}/{id}", action="delete", conditions=DELETE
)
return map
def after_map(self, map):
"""Add new routes that this extension's controllers handle.
See IRoutes.
"""
self._modify_resource_download_route(map)
map.redirect("/analytics/package/top", "/analytics/dataset/top")
map.connect(
"analytics",
"/analytics/dataset/top",
controller="ckanext.googleanalytics.controller:GAController",
action="view",
)
return map
def _modify_resource_download_route(self, map):
"""Modifies resource_download method in related controller
to attach GA tracking code.
"""
if "_routenames" in map.__dict__:
if "resource_download" in map.__dict__["_routenames"]:
route_data = map.__dict__["_routenames"][
"resource_download"
].__dict__
route_controller = route_data["defaults"]["controller"].split(
":"
)
module = importlib.import_module(route_controller[0])
controller_class = getattr(module, route_controller[1])
controller_class.resource_download = wrap_resource_download(
controller_class.resource_download
)
else:
# If no custom uploader applied, use the default one
PackageController.resource_download = wrap_resource_download(
PackageController.resource_download
)
def wrap_resource_download(func):
def func_wrapper(cls, id, resource_id, filename=None):
_post_analytics(
tk.c.user,
"CKAN Resource Download Request",
"Resource",
"Download",
resource_id,
)
return func(cls, id, resource_id, filename=None)
return func_wrapper
def _post_analytics(
user, event_type, request_obj_type, request_function, request_id
):
if config.get("googleanalytics.id"):
data_dict = {
"v": 1,
"tid": config.get("googleanalytics.id"),
"cid": hashlib.md5(tk.c.user).hexdigest(),
# customer id should be obfuscated
"t": "event",
"dh": tk.c.environ["HTTP_HOST"],
"dp": tk.c.environ["PATH_INFO"],
"dr": tk.c.environ.get("HTTP_REFERER", ""),
"ec": event_type,
"ea": request_obj_type + request_function,
"el": request_id,
}
GAMixinPlugin.analytics_queue.put(data_dict)

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from flask import Blueprint
import hashlib
import ckan.views.api as api
import ckan.views.resource as resource
import ckan.logic as logic
import logging
from ckan.common import g
import ckan.plugins.toolkit as tk
log = logging.getLogger(__name__)
ga = Blueprint(u"google_analytics", "google_analytics",)
def action(logic_function, ver=api.API_MAX_VERSION):
try:
function = logic.get_action(logic_function)
side_effect_free = getattr(function, "side_effect_free", False)
request_data = api._get_request_data(try_url_params=side_effect_free)
if isinstance(request_data, dict):
id = request_data.get("id", "")
if "q" in request_data:
id = request_data["q"]
if "query" in request_data:
id = request_data["query"]
_post_analytics(g.user, "CKAN API Request", logic_function, "", id)
except Exception, e:
log.debug(e)
pass
return api.action(logic_function, ver)
ga.add_url_rule(
u"/api/action/<logic_function>",
methods=[u"GET", u"POST"],
view_func=action,
)
ga.add_url_rule(
u"/<int(min=3, max={0}):ver>/action/<logic_function>".format(
api.API_MAX_VERSION
),
methods=[u"GET", u"POST"],
view_func=action,
)
def download(id, resource_id, filename=None, package_type="dataset"):
_post_analytics(
g.user,
"CKAN Resource Download Request",
"Resource",
"Download",
resource_id,
)
return resource.download(package_type, id, resource_id, filename)
ga.add_url_rule(
u"/dataset/<id>/resource/<resource_id>/download", view_func=download
)
ga.add_url_rule(
u"/dataset/<id>/resource/<resource_id>/download/<filename>",
view_func=download,
)
def _post_analytics(
user, event_type, request_obj_type, request_function, request_id
):
from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin
if tk.config.get("googleanalytics.id"):
data_dict = {
"v": 1,
"tid": tk.config.get("googleanalytics.id"),
"cid": hashlib.md5(tk.c.user).hexdigest(),
# customer id should be obfuscated
"t": "event",
"dh": tk.request.environ["HTTP_HOST"],
"dp": tk.request.environ["PATH_INFO"],
"dr": tk.request.environ.get("HTTP_REFERER", ""),
"ec": event_type,
"ea": request_obj_type + request_function,
"el": request_id,
}
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)

View File

@ -1,28 +1,25 @@
from setuptools import setup, find_packages from setuptools import setup, find_packages
version = '0.1' version = "0.1"
setup( setup(
name='ckanext-googleanalytics', name="ckanext-googleanalytics",
version=version, version=version,
description="Add GA tracking and reporting to CKAN instance", description="Add GA tracking and reporting to CKAN instance",
long_description="""\ long_description="""\
""", """,
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
keywords='', keywords="",
author='Seb Bacon', author="Seb Bacon",
author_email='seb.bacon@gmail.com', author_email="seb.bacon@gmail.com",
url='', url="",
license='', license="",
packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
namespace_packages=['ckanext', 'ckanext.googleanalytics'], namespace_packages=["ckanext", "ckanext.googleanalytics"],
include_package_data=True, include_package_data=True,
zip_safe=False, zip_safe=False,
install_requires=[ install_requires=[],
entry_points="""
],
entry_points=\
"""
[ckan.plugins] [ckan.plugins]
# Add plugins here, eg # Add plugins here, eg
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin

View File

@ -53,9 +53,8 @@ class ReusableServer(BaseHTTPServer.HTTPServer):
def runmockserver(): def runmockserver():
server_address = ('localhost', 6969) server_address = ("localhost", 6969)
httpd = ReusableServer(server_address, httpd = ReusableServer(server_address, MockHandler)
MockHandler)
httpd_thread = threading.Thread(target=httpd.serve_til_quit) httpd_thread = threading.Thread(target=httpd.serve_til_quit)
httpd_thread.setDaemon(True) httpd_thread.setDaemon(True)
httpd_thread.start() httpd_thread.start()

View File

@ -15,7 +15,7 @@ import ckanext.googleanalytics.gasnippet as gasnippet
class MockClient(httplib.HTTPConnection): class MockClient(httplib.HTTPConnection):
def request(self, http_request): def request(self, http_request):
filters = http_request.uri.query.get('filters') filters = http_request.uri.query.get("filters")
path = http_request.uri.path path = http_request.uri.path
if filters: if filters:
if "dataset" in filters: if "dataset" in filters:
@ -29,9 +29,9 @@ class MockClient(httplib.HTTPConnection):
class TestConfig(TestCase): class TestConfig(TestCase):
def test_config(self): def test_config(self):
config = appconfig('config:test.ini', relative_to=conf_dir) config = appconfig("config:test.ini", relative_to=conf_dir)
config.local_conf['ckan.plugins'] = 'googleanalytics' config.local_conf["ckan.plugins"] = "googleanalytics"
config.local_conf['googleanalytics.id'] = '' config.local_conf["googleanalytics.id"] = ""
command = LoadAnalytics("loadanalytics") command = LoadAnalytics("loadanalytics")
command.CONFIG = config.local_conf command.CONFIG = config.local_conf
self.assertRaises(Exception, command.run, []) self.assertRaises(Exception, command.run, [])
@ -42,16 +42,19 @@ class TestLoadCommand(TestCase):
def setup_class(cls): def setup_class(cls):
InitDB("initdb").run([]) # set up database tables InitDB("initdb").run([]) # set up database tables
config = appconfig('config:test.ini', relative_to=conf_dir) config = appconfig("config:test.ini", relative_to=conf_dir)
config.local_conf['ckan.plugins'] = 'googleanalytics' config.local_conf["ckan.plugins"] = "googleanalytics"
config.local_conf['googleanalytics.username'] = 'borf' config.local_conf["googleanalytics.username"] = "borf"
config.local_conf['googleanalytics.password'] = 'borf' config.local_conf["googleanalytics.password"] = "borf"
config.local_conf['googleanalytics.id'] = 'UA-borf-1' config.local_conf["googleanalytics.id"] = "UA-borf-1"
config.local_conf['googleanalytics.show_downloads'] = 'true' config.local_conf["googleanalytics.show_downloads"] = "true"
cls.config = config.local_conf cls.config = config.local_conf
wsgiapp = make_app(config.global_conf, **config.local_conf) wsgiapp = make_app(config.global_conf, **config.local_conf)
env = {'HTTP_ACCEPT': ('text/html;q=0.9,text/plain;' env = {
'q=0.8,image/png,*/*;q=0.5')} "HTTP_ACCEPT": (
"text/html;q=0.9,text/plain;" "q=0.8,image/png,*/*;q=0.5"
)
}
cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env) cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env)
CreateTestData.create() CreateTestData.create()
runmockserver() runmockserver()
@ -64,14 +67,16 @@ class TestLoadCommand(TestCase):
conn.getresponse() conn.getresponse()
def test_analytics_snippet(self): def test_analytics_snippet(self):
response = self.app.get(url_for(controller='tag', action='index')) response = self.app.get(url_for(controller="tag", action="index"))
code = gasnippet.header_code % (self.config['googleanalytics.id'], code = gasnippet.header_code % (
'auto') self.config["googleanalytics.id"],
"auto",
)
assert code in response.body assert code in response.body
def test_top_packages(self): def test_top_packages(self):
command = LoadAnalytics("loadanalytics") command = LoadAnalytics("loadanalytics")
command.TEST_HOST = MockClient('localhost', 6969) command.TEST_HOST = MockClient("localhost", 6969)
command.CONFIG = self.config command.CONFIG = self.config
command.run([]) command.run([])
packages = dbutil.get_top_packages() packages = dbutil.get_top_packages()
@ -81,31 +86,37 @@ class TestLoadCommand(TestCase):
def test_download_count_inserted(self): def test_download_count_inserted(self):
command = LoadAnalytics("loadanalytics") command = LoadAnalytics("loadanalytics")
command.TEST_HOST = MockClient('localhost', 6969) command.TEST_HOST = MockClient("localhost", 6969)
command.CONFIG = self.config command.CONFIG = self.config
command.run([]) command.run([])
response = self.app.get(url_for( response = self.app.get(
controller='package', action='read', id='annakarenina' url_for(controller="package", action="read", id="annakarenina")
)) )
assert "[downloaded 4 times]" in response.body assert "[downloaded 4 times]" in response.body
def test_js_inserted_resource_view(self): def test_js_inserted_resource_view(self):
from nose import SkipTest from nose import SkipTest
raise SkipTest("Test won't work until CKAN 1.5.2") raise SkipTest("Test won't work until CKAN 1.5.2")
from ckan.logic.action import get from ckan.logic.action import get
from ckan import model from ckan import model
context = {'model': model, 'ignore_auth': True}
data = {'id': 'annakarenina'} context = {"model": model, "ignore_auth": True}
data = {"id": "annakarenina"}
pkg = get.package_show(context, data) pkg = get.package_show(context, data)
resource_id = pkg['resources'][0]['id'] resource_id = pkg["resources"][0]["id"]
command = LoadAnalytics("loadanalytics") command = LoadAnalytics("loadanalytics")
command.TEST_HOST = MockClient('localhost', 6969) command.TEST_HOST = MockClient("localhost", 6969)
command.CONFIG = self.config command.CONFIG = self.config
command.run([]) command.run([])
response = self.app.get(url_for( response = self.app.get(
controller='package', action='resource_read', id='annakarenina', url_for(
resource_id=resource_id controller="package",
)) action="resource_read",
id="annakarenina",
resource_id=resource_id,
)
)
assert 'onclick="javascript: _gaq.push(' in response.body assert 'onclick="javascript: _gaq.push(' in response.body