2.9 support
This commit is contained in:
parent
a365fcdf6d
commit
a5be073c04
|
@ -1,9 +1,11 @@
|
|||
# this is a namespace package
|
||||
try:
|
||||
import pkg_resources
|
||||
|
||||
pkg_resources.declare_namespace(__name__)
|
||||
except ImportError:
|
||||
import pkgutil
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||
|
||||
try:
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# this is a namespace package
|
||||
try:
|
||||
import pkg_resources
|
||||
|
||||
pkg_resources.declare_namespace(__name__)
|
||||
except ImportError:
|
||||
import pkgutil
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||
|
|
|
@ -10,18 +10,19 @@ import ckan.model as model
|
|||
|
||||
import dbutil
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
PACKAGE_URL = '/dataset/' # XXX get from routes...
|
||||
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
||||
log = logging.getLogger("ckanext.googleanalytics")
|
||||
PACKAGE_URL = "/dataset/" # XXX get from routes...
|
||||
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
|
||||
|
||||
RESOURCE_URL_REGEX = re.compile('/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)')
|
||||
DATASET_EDIT_REGEX = re.compile('/dataset/edit/([a-z0-9-_]+)')
|
||||
RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)")
|
||||
DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)")
|
||||
|
||||
|
||||
class InitDB(CkanCommand):
|
||||
"""Initialise the local stats database tables
|
||||
"""
|
||||
summary = __doc__.split('\n')[0]
|
||||
|
||||
summary = __doc__.split("\n")[0]
|
||||
usage = __doc__
|
||||
max_args = 0
|
||||
min_args = 0
|
||||
|
@ -44,7 +45,8 @@ class LoadAnalytics(CkanCommand):
|
|||
date specifies start date for retrieving
|
||||
analytics data YYYY-MM-DD format
|
||||
"""
|
||||
summary = __doc__.split('\n')[0]
|
||||
|
||||
summary = __doc__.split("\n")[0]
|
||||
usage = __doc__
|
||||
max_args = 3
|
||||
min_args = 1
|
||||
|
@ -57,8 +59,8 @@ class LoadAnalytics(CkanCommand):
|
|||
self.CONFIG = pylonsconfig
|
||||
|
||||
self.resource_url_tag = self.CONFIG.get(
|
||||
'googleanalytics_resource_prefix',
|
||||
DEFAULT_RESOURCE_URL_TAG)
|
||||
"googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG
|
||||
)
|
||||
|
||||
# funny dance we need to do to make sure we've got a
|
||||
# configured session
|
||||
|
@ -69,41 +71,44 @@ class LoadAnalytics(CkanCommand):
|
|||
def internal_save(self, packages_data, summary_date):
|
||||
engine = model.meta.engine
|
||||
# clear out existing data before adding new
|
||||
sql = '''DELETE FROM tracking_summary
|
||||
WHERE tracking_date='%s'; ''' % summary_date
|
||||
sql = (
|
||||
"""DELETE FROM tracking_summary
|
||||
WHERE tracking_date='%s'; """
|
||||
% summary_date
|
||||
)
|
||||
engine.execute(sql)
|
||||
|
||||
for url, count in packages_data.iteritems():
|
||||
# If it matches the resource then we should mark it as a resource.
|
||||
# For resources we don't currently find the package ID.
|
||||
if RESOURCE_URL_REGEX.match(url):
|
||||
tracking_type = 'resource'
|
||||
tracking_type = "resource"
|
||||
else:
|
||||
tracking_type = 'page'
|
||||
tracking_type = "page"
|
||||
|
||||
sql = '''INSERT INTO tracking_summary
|
||||
sql = """INSERT INTO tracking_summary
|
||||
(url, count, tracking_date, tracking_type)
|
||||
VALUES (%s, %s, %s, %s);'''
|
||||
VALUES (%s, %s, %s, %s);"""
|
||||
engine.execute(sql, url, count, summary_date, tracking_type)
|
||||
|
||||
# get ids for dataset urls
|
||||
sql = '''UPDATE tracking_summary t
|
||||
sql = """UPDATE tracking_summary t
|
||||
SET package_id = COALESCE(
|
||||
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
||||
,'~~not~found~~')
|
||||
WHERE t.package_id IS NULL AND tracking_type = 'page';'''
|
||||
WHERE t.package_id IS NULL AND tracking_type = 'page';"""
|
||||
engine.execute(sql, PACKAGE_URL)
|
||||
|
||||
# get ids for dataset edit urls which aren't captured otherwise
|
||||
sql = '''UPDATE tracking_summary t
|
||||
sql = """UPDATE tracking_summary t
|
||||
SET package_id = COALESCE(
|
||||
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
||||
,'~~not~found~~')
|
||||
WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';'''
|
||||
engine.execute(sql, '%sedit/' % PACKAGE_URL)
|
||||
WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';"""
|
||||
engine.execute(sql, "%sedit/" % PACKAGE_URL)
|
||||
|
||||
# update summary totals for resources
|
||||
sql = '''UPDATE tracking_summary t1
|
||||
sql = """UPDATE tracking_summary t1
|
||||
SET running_total = (
|
||||
SELECT sum(count)
|
||||
FROM tracking_summary t2
|
||||
|
@ -116,11 +121,11 @@ class LoadAnalytics(CkanCommand):
|
|||
WHERE t1.url = t2.url
|
||||
AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - 14
|
||||
) + t1.count
|
||||
WHERE t1.running_total = 0 AND tracking_type = 'resource';'''
|
||||
WHERE t1.running_total = 0 AND tracking_type = 'resource';"""
|
||||
engine.execute(sql)
|
||||
|
||||
# update summary totals for pages
|
||||
sql = '''UPDATE tracking_summary t1
|
||||
sql = """UPDATE tracking_summary t1
|
||||
SET running_total = (
|
||||
SELECT sum(count)
|
||||
FROM tracking_summary t2
|
||||
|
@ -135,23 +140,23 @@ class LoadAnalytics(CkanCommand):
|
|||
) + t1.count
|
||||
WHERE t1.running_total = 0 AND tracking_type = 'page'
|
||||
AND t1.package_id IS NOT NULL
|
||||
AND t1.package_id != '~~not~found~~';'''
|
||||
AND t1.package_id != '~~not~found~~';"""
|
||||
engine.execute(sql)
|
||||
|
||||
def bulk_import(self):
|
||||
if len(self.args) == 3:
|
||||
# Get summeries from specified date
|
||||
start_date = datetime.datetime.strptime(self.args[2], '%Y-%m-%d')
|
||||
start_date = datetime.datetime.strptime(self.args[2], "%Y-%m-%d")
|
||||
else:
|
||||
# No date given. See when we last have data for and get data
|
||||
# from 2 days before then in case new data is available.
|
||||
# If no date here then use 2010-01-01 as the start date
|
||||
engine = model.meta.engine
|
||||
sql = '''SELECT tracking_date from tracking_summary
|
||||
ORDER BY tracking_date DESC LIMIT 1;'''
|
||||
sql = """SELECT tracking_date from tracking_summary
|
||||
ORDER BY tracking_date DESC LIMIT 1;"""
|
||||
result = engine.execute(sql).fetchall()
|
||||
if result:
|
||||
start_date = result[0]['tracking_date']
|
||||
start_date = result[0]["tracking_date"]
|
||||
start_date += datetime.timedelta(-2)
|
||||
# convert date to datetime
|
||||
combine = datetime.datetime.combine
|
||||
|
@ -161,14 +166,15 @@ class LoadAnalytics(CkanCommand):
|
|||
end_date = datetime.datetime.now()
|
||||
while start_date < end_date:
|
||||
stop_date = start_date + datetime.timedelta(1)
|
||||
packages_data = self.get_ga_data_new(start_date=start_date,
|
||||
end_date=stop_date)
|
||||
packages_data = self.get_ga_data_new(
|
||||
start_date=start_date, end_date=stop_date
|
||||
)
|
||||
self.internal_save(packages_data, start_date)
|
||||
# sleep to rate limit requests
|
||||
time.sleep(0.25)
|
||||
start_date = stop_date
|
||||
log.info('%s received %s' % (len(packages_data), start_date))
|
||||
print '%s received %s' % (len(packages_data), start_date)
|
||||
log.info("%s received %s" % (len(packages_data), start_date))
|
||||
print "%s received %s" % (len(packages_data), start_date)
|
||||
|
||||
def get_ga_data_new(self, start_date=None, end_date=None):
|
||||
"""Get raw data from Google Analtyics for packages and
|
||||
|
@ -182,32 +188,41 @@ class LoadAnalytics(CkanCommand):
|
|||
end_date = end_date.strftime("%Y-%m-%d")
|
||||
|
||||
packages = {}
|
||||
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
|
||||
(PACKAGE_URL, self.resource_url_tag)
|
||||
metrics = 'ga:uniquePageviews'
|
||||
sort = '-ga:uniquePageviews'
|
||||
query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
|
||||
PACKAGE_URL,
|
||||
self.resource_url_tag,
|
||||
)
|
||||
metrics = "ga:uniquePageviews"
|
||||
sort = "-ga:uniquePageviews"
|
||||
|
||||
start_index = 1
|
||||
max_results = 10000
|
||||
# data retrival is chunked
|
||||
completed = False
|
||||
while not completed:
|
||||
results = self.service.data().ga().get(ids='ga:%s' % self.profile_id,
|
||||
filters=query,
|
||||
dimensions='ga:pagePath',
|
||||
start_date=start_date,
|
||||
start_index=start_index,
|
||||
max_results=max_results,
|
||||
metrics=metrics,
|
||||
sort=sort,
|
||||
end_date=end_date).execute()
|
||||
result_count = len(results.get('rows', []))
|
||||
results = (
|
||||
self.service.data()
|
||||
.ga()
|
||||
.get(
|
||||
ids="ga:%s" % self.profile_id,
|
||||
filters=query,
|
||||
dimensions="ga:pagePath",
|
||||
start_date=start_date,
|
||||
start_index=start_index,
|
||||
max_results=max_results,
|
||||
metrics=metrics,
|
||||
sort=sort,
|
||||
end_date=end_date,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
result_count = len(results.get("rows", []))
|
||||
if result_count < max_results:
|
||||
completed = True
|
||||
|
||||
for result in results.get('rows', []):
|
||||
for result in results.get("rows", []):
|
||||
package = result[0]
|
||||
package = '/' + '/'.join(package.split('/')[2:])
|
||||
package = "/" + "/".join(package.split("/")[2:])
|
||||
count = result[1]
|
||||
packages[package] = int(count)
|
||||
|
||||
|
@ -219,25 +234,27 @@ class LoadAnalytics(CkanCommand):
|
|||
|
||||
def parse_and_save(self):
|
||||
"""Grab raw data from Google Analytics and save to the database"""
|
||||
from ga_auth import (init_service, get_profile_id)
|
||||
from ga_auth import init_service, get_profile_id
|
||||
|
||||
tokenfile = self.args[0]
|
||||
if not os.path.exists(tokenfile):
|
||||
raise Exception('Cannot find the token file %s' % self.args[0])
|
||||
raise Exception("Cannot find the token file %s" % self.args[0])
|
||||
|
||||
try:
|
||||
self.service = init_service(self.args[0])
|
||||
except TypeError as e:
|
||||
raise Exception('Unable to create a service: {0}'.format(e))
|
||||
raise Exception("Unable to create a service: {0}".format(e))
|
||||
self.profile_id = get_profile_id(self.service)
|
||||
|
||||
if len(self.args) > 1:
|
||||
if len(self.args) > 2 and self.args[1].lower() != 'internal':
|
||||
raise Exception('Illegal argument %s' % self.args[1])
|
||||
if len(self.args) > 2 and self.args[1].lower() != "internal":
|
||||
raise Exception("Illegal argument %s" % self.args[1])
|
||||
self.bulk_import()
|
||||
else:
|
||||
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
|
||||
(PACKAGE_URL, self.resource_url_tag)
|
||||
query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
|
||||
PACKAGE_URL,
|
||||
self.resource_url_tag,
|
||||
)
|
||||
packages_data = self.get_ga_data(query_filter=query)
|
||||
self.save_ga_data(packages_data)
|
||||
log.info("Saved %s records from google" % len(packages_data))
|
||||
|
@ -246,20 +263,24 @@ class LoadAnalytics(CkanCommand):
|
|||
"""Save tuples of packages_data to the database
|
||||
"""
|
||||
for identifier, visits in packages_data.items():
|
||||
recently = visits.get('recent', 0)
|
||||
ever = visits.get('ever', 0)
|
||||
recently = visits.get("recent", 0)
|
||||
ever = visits.get("ever", 0)
|
||||
matches = RESOURCE_URL_REGEX.match(identifier)
|
||||
if matches:
|
||||
resource_url = identifier[len(self.resource_url_tag):]
|
||||
resource = model.Session.query(model.Resource).autoflush(True)\
|
||||
.filter_by(id=matches.group(1)).first()
|
||||
resource_url = identifier[len(self.resource_url_tag) :]
|
||||
resource = (
|
||||
model.Session.query(model.Resource)
|
||||
.autoflush(True)
|
||||
.filter_by(id=matches.group(1))
|
||||
.first()
|
||||
)
|
||||
if not resource:
|
||||
log.warning("Couldn't find resource %s" % resource_url)
|
||||
continue
|
||||
dbutil.update_resource_visits(resource.id, recently, ever)
|
||||
log.info("Updated %s with %s visits" % (resource.id, visits))
|
||||
else:
|
||||
package_name = identifier[len(PACKAGE_URL):]
|
||||
package_name = identifier[len(PACKAGE_URL) :]
|
||||
if "/" in package_name:
|
||||
log.warning("%s not a valid package name" % package_name)
|
||||
continue
|
||||
|
@ -271,8 +292,16 @@ class LoadAnalytics(CkanCommand):
|
|||
log.info("Updated %s with %s visits" % (item.id, visits))
|
||||
model.Session.commit()
|
||||
|
||||
def ga_query(self, query_filter=None, from_date=None, to_date=None,
|
||||
start_index=1, max_results=10000, metrics=None, sort=None):
|
||||
def ga_query(
|
||||
self,
|
||||
query_filter=None,
|
||||
from_date=None,
|
||||
to_date=None,
|
||||
start_index=1,
|
||||
max_results=10000,
|
||||
metrics=None,
|
||||
sort=None,
|
||||
):
|
||||
"""Execute a query against Google Analytics
|
||||
"""
|
||||
if not to_date:
|
||||
|
@ -281,22 +310,28 @@ class LoadAnalytics(CkanCommand):
|
|||
if isinstance(from_date, datetime.date):
|
||||
from_date = from_date.strftime("%Y-%m-%d")
|
||||
if not metrics:
|
||||
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
||||
metrics = "ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews"
|
||||
if not sort:
|
||||
sort = '-ga:uniquePageviews'
|
||||
sort = "-ga:uniquePageviews"
|
||||
|
||||
print '%s -> %s' % (from_date, to_date)
|
||||
print "%s -> %s" % (from_date, to_date)
|
||||
|
||||
results = self.service.data().ga().get(ids='ga:' + self.profile_id,
|
||||
start_date=from_date,
|
||||
end_date=to_date,
|
||||
dimensions='ga:pagePath',
|
||||
metrics=metrics,
|
||||
sort=sort,
|
||||
start_index=start_index,
|
||||
filters=query_filter,
|
||||
max_results=max_results
|
||||
).execute()
|
||||
results = (
|
||||
self.service.data()
|
||||
.ga()
|
||||
.get(
|
||||
ids="ga:" + self.profile_id,
|
||||
start_date=from_date,
|
||||
end_date=to_date,
|
||||
dimensions="ga:pagePath",
|
||||
metrics=metrics,
|
||||
sort=sort,
|
||||
start_index=start_index,
|
||||
filters=query_filter,
|
||||
max_results=max_results,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
return results
|
||||
|
||||
def get_ga_data(self, query_filter=None, start_date=None, end_date=None):
|
||||
|
@ -312,25 +347,31 @@ class LoadAnalytics(CkanCommand):
|
|||
recent_date = recent_date.strftime("%Y-%m-%d")
|
||||
floor_date = datetime.date(2005, 1, 1)
|
||||
packages = {}
|
||||
queries = ['ga:pagePath=~%s' % PACKAGE_URL]
|
||||
dates = {'recent': recent_date, 'ever': floor_date}
|
||||
queries = ["ga:pagePath=~%s" % PACKAGE_URL]
|
||||
dates = {"recent": recent_date, "ever": floor_date}
|
||||
for date_name, date in dates.iteritems():
|
||||
for query in queries:
|
||||
results = self.ga_query(query_filter=query,
|
||||
metrics='ga:uniquePageviews',
|
||||
from_date=date)
|
||||
if 'rows' in results:
|
||||
for result in results.get('rows'):
|
||||
results = self.ga_query(
|
||||
query_filter=query,
|
||||
metrics="ga:uniquePageviews",
|
||||
from_date=date,
|
||||
)
|
||||
if "rows" in results:
|
||||
for result in results.get("rows"):
|
||||
package = result[0]
|
||||
if not package.startswith(PACKAGE_URL):
|
||||
package = '/' + '/'.join(package.split('/')[2:])
|
||||
package = "/" + "/".join(package.split("/")[2:])
|
||||
|
||||
count = result[1]
|
||||
# Make sure we add the different representations of the same
|
||||
# dataset /mysite.com & /www.mysite.com ...
|
||||
val = 0
|
||||
if package in packages and date_name in packages[package]:
|
||||
if (
|
||||
package in packages
|
||||
and date_name in packages[package]
|
||||
):
|
||||
val += packages[package][date_name]
|
||||
packages.setdefault(package, {})[date_name] = \
|
||||
packages.setdefault(package, {})[date_name] = (
|
||||
int(count) + val
|
||||
)
|
||||
return packages
|
||||
|
|
|
@ -11,32 +11,33 @@ from paste.util.multidict import MultiDict
|
|||
|
||||
from ckan.controllers.api import ApiController
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
log = logging.getLogger("ckanext.googleanalytics")
|
||||
|
||||
|
||||
class GAController(BaseController):
|
||||
def view(self):
|
||||
# get package objects corresponding to popular GA content
|
||||
c.top_resources = dbutil.get_top_resources(limit=10)
|
||||
return render('summary.html')
|
||||
return render("summary.html")
|
||||
|
||||
|
||||
class GAApiController(ApiController):
|
||||
# intercept API calls to record via google analytics
|
||||
def _post_analytics(
|
||||
self, user, request_obj_type, request_function, request_id):
|
||||
if config.get('googleanalytics.id'):
|
||||
self, user, request_obj_type, request_function, request_id
|
||||
):
|
||||
if config.get("googleanalytics.id"):
|
||||
data_dict = {
|
||||
"v": 1,
|
||||
"tid": config.get('googleanalytics.id'),
|
||||
"tid": config.get("googleanalytics.id"),
|
||||
"cid": hashlib.md5(user).hexdigest(),
|
||||
# customer id should be obfuscated
|
||||
"t": "event",
|
||||
"dh": c.environ['HTTP_HOST'],
|
||||
"dp": c.environ['PATH_INFO'],
|
||||
"dr": c.environ.get('HTTP_REFERER', ''),
|
||||
"dh": c.environ["HTTP_HOST"],
|
||||
"dp": c.environ["PATH_INFO"],
|
||||
"dr": c.environ.get("HTTP_REFERER", ""),
|
||||
"ec": "CKAN API Request",
|
||||
"ea": request_obj_type+request_function,
|
||||
"ea": request_obj_type + request_function,
|
||||
"el": request_id,
|
||||
}
|
||||
plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
||||
|
@ -44,66 +45,72 @@ class GAApiController(ApiController):
|
|||
def action(self, logic_function, ver=None):
|
||||
try:
|
||||
function = logic.get_action(logic_function)
|
||||
side_effect_free = getattr(function, 'side_effect_free', False)
|
||||
side_effect_free = getattr(function, "side_effect_free", False)
|
||||
request_data = self._get_request_data(
|
||||
try_url_params=side_effect_free)
|
||||
try_url_params=side_effect_free
|
||||
)
|
||||
if isinstance(request_data, dict):
|
||||
id = request_data.get('id', '')
|
||||
if 'q' in request_data:
|
||||
id = request_data['q']
|
||||
if 'query' in request_data:
|
||||
id = request_data['query']
|
||||
self._post_analytics(c.user, logic_function, '', id)
|
||||
id = request_data.get("id", "")
|
||||
if "q" in request_data:
|
||||
id = request_data["q"]
|
||||
if "query" in request_data:
|
||||
id = request_data["query"]
|
||||
self._post_analytics(c.user, logic_function, "", id)
|
||||
except Exception, e:
|
||||
log.debug(e)
|
||||
pass
|
||||
|
||||
return ApiController.action(self, logic_function, ver)
|
||||
|
||||
def list(self, ver=None, register=None,
|
||||
subregister=None, id=None):
|
||||
self._post_analytics(c.user,
|
||||
register +
|
||||
("_"+str(subregister) if subregister else ""),
|
||||
"list",
|
||||
id)
|
||||
def list(self, ver=None, register=None, subregister=None, id=None):
|
||||
self._post_analytics(
|
||||
c.user,
|
||||
register + ("_" + str(subregister) if subregister else ""),
|
||||
"list",
|
||||
id,
|
||||
)
|
||||
return ApiController.list(self, ver, register, subregister, id)
|
||||
|
||||
def show(self, ver=None, register=None,
|
||||
subregister=None, id=None, id2=None):
|
||||
self._post_analytics(c.user,
|
||||
register +
|
||||
("_"+str(subregister) if subregister else ""),
|
||||
"show",
|
||||
id)
|
||||
def show(
|
||||
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||
):
|
||||
self._post_analytics(
|
||||
c.user,
|
||||
register + ("_" + str(subregister) if subregister else ""),
|
||||
"show",
|
||||
id,
|
||||
)
|
||||
return ApiController.show(self, ver, register, subregister, id, id2)
|
||||
|
||||
def update(self, ver=None, register=None,
|
||||
subregister=None, id=None, id2=None):
|
||||
self._post_analytics(c.user,
|
||||
register +
|
||||
("_"+str(subregister) if subregister else ""),
|
||||
"update",
|
||||
id)
|
||||
def update(
|
||||
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||
):
|
||||
self._post_analytics(
|
||||
c.user,
|
||||
register + ("_" + str(subregister) if subregister else ""),
|
||||
"update",
|
||||
id,
|
||||
)
|
||||
return ApiController.update(self, ver, register, subregister, id, id2)
|
||||
|
||||
def delete(self, ver=None, register=None,
|
||||
subregister=None, id=None, id2=None):
|
||||
self._post_analytics(c.user,
|
||||
register +
|
||||
("_"+str(subregister) if subregister else ""),
|
||||
"delete",
|
||||
id)
|
||||
def delete(
|
||||
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||
):
|
||||
self._post_analytics(
|
||||
c.user,
|
||||
register + ("_" + str(subregister) if subregister else ""),
|
||||
"delete",
|
||||
id,
|
||||
)
|
||||
return ApiController.delete(self, ver, register, subregister, id, id2)
|
||||
|
||||
def search(self, ver=None, register=None):
|
||||
id = None
|
||||
try:
|
||||
params = MultiDict(self._get_search_params(request.params))
|
||||
if 'q' in params.keys():
|
||||
id = params['q']
|
||||
if 'query' in params.keys():
|
||||
id = params['query']
|
||||
if "q" in params.keys():
|
||||
id = params["q"]
|
||||
if "query" in params.keys():
|
||||
id = params["query"]
|
||||
except ValueError, e:
|
||||
log.debug(str(e))
|
||||
pass
|
||||
|
|
|
@ -3,6 +3,7 @@ from sqlalchemy.sql import select, text
|
|||
from sqlalchemy import func
|
||||
|
||||
import ckan.model as model
|
||||
|
||||
# from ckan.model.authz import PSEUDO_USER__VISITOR
|
||||
from ckan.lib.base import *
|
||||
|
||||
|
@ -11,16 +12,20 @@ cached_tables = {}
|
|||
|
||||
def init_tables():
|
||||
metadata = MetaData()
|
||||
package_stats = Table('package_stats', metadata,
|
||||
Column('package_id', String(60),
|
||||
primary_key=True),
|
||||
Column('visits_recently', Integer),
|
||||
Column('visits_ever', Integer))
|
||||
resource_stats = Table('resource_stats', metadata,
|
||||
Column('resource_id', String(60),
|
||||
primary_key=True),
|
||||
Column('visits_recently', Integer),
|
||||
Column('visits_ever', Integer))
|
||||
package_stats = Table(
|
||||
"package_stats",
|
||||
metadata,
|
||||
Column("package_id", String(60), primary_key=True),
|
||||
Column("visits_recently", Integer),
|
||||
Column("visits_ever", Integer),
|
||||
)
|
||||
resource_stats = Table(
|
||||
"resource_stats",
|
||||
metadata,
|
||||
Column("resource_id", String(60), primary_key=True),
|
||||
Column("visits_recently", Integer),
|
||||
Column("visits_ever", Integer),
|
||||
)
|
||||
metadata.create_all(model.meta.engine)
|
||||
|
||||
|
||||
|
@ -35,63 +40,68 @@ def get_table(name):
|
|||
|
||||
def _update_visits(table_name, item_id, recently, ever):
|
||||
stats = get_table(table_name)
|
||||
id_col_name = "%s_id" % table_name[:-len("_stats")]
|
||||
id_col_name = "%s_id" % table_name[: -len("_stats")]
|
||||
id_col = getattr(stats.c, id_col_name)
|
||||
s = select([func.count(id_col)],
|
||||
id_col == item_id)
|
||||
s = select([func.count(id_col)], id_col == item_id)
|
||||
connection = model.Session.connection()
|
||||
count = connection.execute(s).fetchone()
|
||||
if count and count[0]:
|
||||
connection.execute(stats.update()\
|
||||
.where(id_col == item_id)\
|
||||
.values(visits_recently=recently,
|
||||
visits_ever=ever))
|
||||
connection.execute(
|
||||
stats.update()
|
||||
.where(id_col == item_id)
|
||||
.values(visits_recently=recently, visits_ever=ever)
|
||||
)
|
||||
else:
|
||||
values = {id_col_name: item_id,
|
||||
'visits_recently': recently,
|
||||
'visits_ever': ever}
|
||||
connection.execute(stats.insert()\
|
||||
.values(**values))
|
||||
values = {
|
||||
id_col_name: item_id,
|
||||
"visits_recently": recently,
|
||||
"visits_ever": ever,
|
||||
}
|
||||
connection.execute(stats.insert().values(**values))
|
||||
|
||||
|
||||
def update_resource_visits(resource_id, recently, ever):
|
||||
return _update_visits("resource_stats",
|
||||
resource_id,
|
||||
recently,
|
||||
ever)
|
||||
return _update_visits("resource_stats", resource_id, recently, ever)
|
||||
|
||||
|
||||
def update_package_visits(package_id, recently, ever):
|
||||
return _update_visits("package_stats",
|
||||
package_id,
|
||||
recently,
|
||||
ever)
|
||||
return _update_visits("package_stats", package_id, recently, ever)
|
||||
|
||||
|
||||
def get_resource_visits_for_url(url):
|
||||
connection = model.Session.connection()
|
||||
count = connection.execute(
|
||||
text("""SELECT visits_ever FROM resource_stats, resource
|
||||
text(
|
||||
"""SELECT visits_ever FROM resource_stats, resource
|
||||
WHERE resource_id = resource.id
|
||||
AND resource.url = :url"""), url=url).fetchone()
|
||||
AND resource.url = :url"""
|
||||
),
|
||||
url=url,
|
||||
).fetchone()
|
||||
return count and count[0] or ""
|
||||
|
||||
|
||||
""" get_top_packages is broken, and needs to be rewritten to work with
|
||||
CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.*
|
||||
|
||||
See commit ffa86c010d5d25fa1881c6b915e48f3b44657612
|
||||
"""
|
||||
|
||||
|
||||
def get_top_packages(limit=20):
|
||||
items = []
|
||||
# caveat emptor: the query below will not filter out private
|
||||
# or deleted datasets (TODO)
|
||||
q = model.Session.query(model.Package)
|
||||
connection = model.Session.connection()
|
||||
package_stats = get_table('package_stats')
|
||||
s = select([package_stats.c.package_id,
|
||||
package_stats.c.visits_recently,
|
||||
package_stats.c.visits_ever])\
|
||||
.order_by(package_stats.c.visits_recently.desc())
|
||||
package_stats = get_table("package_stats")
|
||||
s = select(
|
||||
[
|
||||
package_stats.c.package_id,
|
||||
package_stats.c.visits_recently,
|
||||
package_stats.c.visits_ever,
|
||||
]
|
||||
).order_by(package_stats.c.visits_recently.desc())
|
||||
res = connection.execute(s).fetchmany(limit)
|
||||
for package_id, recent, ever in res:
|
||||
item = q.filter("package.id = '%s'" % package_id)
|
||||
|
@ -104,15 +114,19 @@ def get_top_packages(limit=20):
|
|||
def get_top_resources(limit=20):
|
||||
items = []
|
||||
connection = model.Session.connection()
|
||||
resource_stats = get_table('resource_stats')
|
||||
s = select([resource_stats.c.resource_id,
|
||||
resource_stats.c.visits_recently,
|
||||
resource_stats.c.visits_ever])\
|
||||
.order_by(resource_stats.c.visits_recently.desc())
|
||||
resource_stats = get_table("resource_stats")
|
||||
s = select(
|
||||
[
|
||||
resource_stats.c.resource_id,
|
||||
resource_stats.c.visits_recently,
|
||||
resource_stats.c.visits_ever,
|
||||
]
|
||||
).order_by(resource_stats.c.visits_recently.desc())
|
||||
res = connection.execute(s).fetchmany(limit)
|
||||
for resource_id, recent, ever in res:
|
||||
item = model.Session.query(model.Resource)\
|
||||
.filter("resource.id = '%s'" % resource_id)
|
||||
item = model.Session.query(model.Resource).filter(
|
||||
"resource.id = '%s'" % resource_id
|
||||
)
|
||||
if not item.count():
|
||||
continue
|
||||
items.append((item.first(), recent, ever))
|
||||
|
|
|
@ -10,10 +10,9 @@ def _prepare_credentials(credentials_filename):
|
|||
Either returns the user's oauth credentials or uses the credentials
|
||||
file to generate a token (by forcing the user to login in the browser)
|
||||
"""
|
||||
scope = ['https://www.googleapis.com/auth/analytics.readonly']
|
||||
scope = ["https://www.googleapis.com/auth/analytics.readonly"]
|
||||
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
||||
credentials_filename,
|
||||
scopes=scope
|
||||
credentials_filename, scopes=scope
|
||||
)
|
||||
return credentials
|
||||
|
||||
|
@ -29,7 +28,7 @@ def init_service(credentials_file):
|
|||
credentials = _prepare_credentials(credentials_file)
|
||||
http = credentials.authorize(http) # authorize the http object
|
||||
|
||||
return build('analytics', 'v3', http=http)
|
||||
return build("analytics", "v3", http=http)
|
||||
|
||||
|
||||
def get_profile_id(service):
|
||||
|
@ -42,23 +41,31 @@ def get_profile_id(service):
|
|||
"""
|
||||
accounts = service.management().accounts().list().execute()
|
||||
|
||||
if not accounts.get('items'):
|
||||
if not accounts.get("items"):
|
||||
return None
|
||||
|
||||
accountName = config.get('googleanalytics.account')
|
||||
webPropertyId = config.get('googleanalytics.id')
|
||||
for acc in accounts.get('items'):
|
||||
if acc.get('name') == accountName:
|
||||
accountId = acc.get('id')
|
||||
accountName = config.get("googleanalytics.account")
|
||||
webPropertyId = config.get("googleanalytics.id")
|
||||
for acc in accounts.get("items"):
|
||||
if acc.get("name") == accountName:
|
||||
accountId = acc.get("id")
|
||||
|
||||
# TODO: check, whether next line is doing something useful.
|
||||
webproperties = service.management().webproperties().list(
|
||||
accountId=accountId).execute()
|
||||
webproperties = (
|
||||
service.management()
|
||||
.webproperties()
|
||||
.list(accountId=accountId)
|
||||
.execute()
|
||||
)
|
||||
|
||||
profiles = service.management().profiles().list(
|
||||
accountId=accountId, webPropertyId=webPropertyId).execute()
|
||||
profiles = (
|
||||
service.management()
|
||||
.profiles()
|
||||
.list(accountId=accountId, webPropertyId=webPropertyId)
|
||||
.execute()
|
||||
)
|
||||
|
||||
if profiles.get('items'):
|
||||
return profiles.get('items')[0].get('id')
|
||||
if profiles.get("items"):
|
||||
return profiles.get("items")[0].get("id")
|
||||
|
||||
return None
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
from ckan import model
|
||||
|
||||
def setup():
|
||||
connection = model.Session.connection()
|
||||
connection.execute("""CREATE TABLE IF NOT EXISTS package_downloads (
|
||||
id integer primary_key,
|
||||
package_id varchar(60),
|
||||
download_visits integer,
|
||||
views_visits integer);""")
|
||||
|
||||
|
|
@ -1,275 +0,0 @@
|
|||
import ast
|
||||
import logging
|
||||
import urllib
|
||||
import commands
|
||||
import paste.deploy.converters as converters
|
||||
from ckan.lib.base import c
|
||||
import ckan.lib.helpers as h
|
||||
import ckan.plugins as p
|
||||
from routes.mapper import SubMapper
|
||||
from pylons import config
|
||||
from ckan.controllers.package import PackageController
|
||||
|
||||
import urllib2
|
||||
import importlib
|
||||
import hashlib
|
||||
|
||||
import threading
|
||||
import Queue
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
|
||||
|
||||
def _post_analytics(
|
||||
user, event_type, request_obj_type, request_function, request_id):
|
||||
|
||||
if config.get('googleanalytics.id'):
|
||||
data_dict = {
|
||||
"v": 1,
|
||||
"tid": config.get('googleanalytics.id'),
|
||||
"cid": hashlib.md5(c.user).hexdigest(),
|
||||
# customer id should be obfuscated
|
||||
"t": "event",
|
||||
"dh": c.environ['HTTP_HOST'],
|
||||
"dp": c.environ['PATH_INFO'],
|
||||
"dr": c.environ.get('HTTP_REFERER', ''),
|
||||
"ec": event_type,
|
||||
"ea": request_obj_type + request_function,
|
||||
"el": request_id,
|
||||
}
|
||||
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
||||
|
||||
|
||||
def wrap_resource_download(func):
|
||||
|
||||
def func_wrapper(cls, id, resource_id, filename=None):
|
||||
_post_analytics(
|
||||
c.user,
|
||||
"CKAN Resource Download Request",
|
||||
"Resource",
|
||||
"Download",
|
||||
resource_id
|
||||
)
|
||||
|
||||
return func(cls, id, resource_id, filename=None)
|
||||
|
||||
return func_wrapper
|
||||
|
||||
|
||||
class GoogleAnalyticsException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class AnalyticsPostThread(threading.Thread):
|
||||
"""Threaded Url POST"""
|
||||
def __init__(self, queue):
|
||||
threading.Thread.__init__(self)
|
||||
self.queue = queue
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
# grabs host from queue
|
||||
data_dict = self.queue.get()
|
||||
|
||||
data = urllib.urlencode(data_dict)
|
||||
log.debug("Sending API event to Google Analytics: " + data)
|
||||
# send analytics
|
||||
urllib2.urlopen(
|
||||
"http://www.google-analytics.com/collect",
|
||||
data,
|
||||
# timeout in seconds
|
||||
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
|
||||
10)
|
||||
|
||||
# signals to queue job is done
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
class GoogleAnalyticsPlugin(p.SingletonPlugin):
|
||||
p.implements(p.IConfigurable, inherit=True)
|
||||
p.implements(p.IRoutes, inherit=True)
|
||||
p.implements(p.IConfigurer, inherit=True)
|
||||
p.implements(p.ITemplateHelpers)
|
||||
|
||||
analytics_queue = Queue.Queue()
|
||||
|
||||
def configure(self, config):
|
||||
'''Load config settings for this extension from config file.
|
||||
|
||||
See IConfigurable.
|
||||
|
||||
'''
|
||||
if 'googleanalytics.id' not in config:
|
||||
msg = "Missing googleanalytics.id in config"
|
||||
raise GoogleAnalyticsException(msg)
|
||||
self.googleanalytics_id = config['googleanalytics.id']
|
||||
self.googleanalytics_domain = config.get(
|
||||
'googleanalytics.domain', 'auto')
|
||||
self.googleanalytics_fields = ast.literal_eval(config.get(
|
||||
'googleanalytics.fields', '{}'))
|
||||
|
||||
googleanalytics_linked_domains = config.get(
|
||||
'googleanalytics.linked_domains', ''
|
||||
)
|
||||
self.googleanalytics_linked_domains = [
|
||||
x.strip() for x in googleanalytics_linked_domains.split(',') if x
|
||||
]
|
||||
|
||||
if self.googleanalytics_linked_domains:
|
||||
self.googleanalytics_fields['allowLinker'] = 'true'
|
||||
|
||||
self.googleanalytics_javascript_url = h.url_for_static(
|
||||
'/scripts/ckanext-googleanalytics.js')
|
||||
|
||||
# If resource_prefix is not in config file then write the default value
|
||||
# to the config dict, otherwise templates seem to get 'true' when they
|
||||
# try to read resource_prefix from config.
|
||||
if 'googleanalytics_resource_prefix' not in config:
|
||||
config['googleanalytics_resource_prefix'] = (
|
||||
commands.DEFAULT_RESOURCE_URL_TAG)
|
||||
self.googleanalytics_resource_prefix = config[
|
||||
'googleanalytics_resource_prefix']
|
||||
|
||||
self.show_downloads = converters.asbool(
|
||||
config.get('googleanalytics.show_downloads', True))
|
||||
self.track_events = converters.asbool(
|
||||
config.get('googleanalytics.track_events', False))
|
||||
self.enable_user_id = converters.asbool(
|
||||
config.get('googleanalytics.enable_user_id', False))
|
||||
|
||||
if not converters.asbool(config.get('ckan.legacy_templates', 'false')):
|
||||
p.toolkit.add_resource('fanstatic_library', 'ckanext-googleanalytics')
|
||||
|
||||
# spawn a pool of 5 threads, and pass them queue instance
|
||||
for i in range(5):
|
||||
t = AnalyticsPostThread(self.analytics_queue)
|
||||
t.setDaemon(True)
|
||||
t.start()
|
||||
|
||||
|
||||
def update_config(self, config):
|
||||
'''Change the CKAN (Pylons) environment configuration.
|
||||
|
||||
See IConfigurer.
|
||||
|
||||
'''
|
||||
if converters.asbool(config.get('ckan.legacy_templates', 'false')):
|
||||
p.toolkit.add_template_directory(config, 'legacy_templates')
|
||||
p.toolkit.add_public_directory(config, 'legacy_public')
|
||||
else:
|
||||
p.toolkit.add_template_directory(config, 'templates')
|
||||
|
||||
def before_map(self, map):
|
||||
'''Add new routes that this extension's controllers handle.
|
||||
|
||||
See IRoutes.
|
||||
|
||||
'''
|
||||
# Helpers to reduce code clutter
|
||||
GET = dict(method=['GET'])
|
||||
PUT = dict(method=['PUT'])
|
||||
POST = dict(method=['POST'])
|
||||
DELETE = dict(method=['DELETE'])
|
||||
GET_POST = dict(method=['GET', 'POST'])
|
||||
# intercept API calls that we want to capture analytics on
|
||||
register_list = [
|
||||
'package',
|
||||
'dataset',
|
||||
'resource',
|
||||
'tag',
|
||||
'group',
|
||||
'related',
|
||||
'revision',
|
||||
'licenses',
|
||||
'rating',
|
||||
'user',
|
||||
'activity'
|
||||
]
|
||||
register_list_str = '|'.join(register_list)
|
||||
# /api ver 3 or none
|
||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/3|}',
|
||||
ver='/3') as m:
|
||||
m.connect('/action/{logic_function}', action='action',
|
||||
conditions=GET_POST)
|
||||
|
||||
# /api ver 1, 2, 3 or none
|
||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|/3|}',
|
||||
ver='/1') as m:
|
||||
m.connect('/search/{register}', action='search')
|
||||
|
||||
# /api/rest ver 1, 2 or none
|
||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|}',
|
||||
ver='/1', requirements=dict(register=register_list_str)
|
||||
) as m:
|
||||
|
||||
m.connect('/rest/{register}', action='list', conditions=GET)
|
||||
m.connect('/rest/{register}', action='create', conditions=POST)
|
||||
m.connect('/rest/{register}/{id}', action='show', conditions=GET)
|
||||
m.connect('/rest/{register}/{id}', action='update', conditions=PUT)
|
||||
m.connect('/rest/{register}/{id}', action='update', conditions=POST)
|
||||
m.connect('/rest/{register}/{id}', action='delete', conditions=DELETE)
|
||||
|
||||
return map
|
||||
|
||||
def after_map(self, map):
|
||||
'''Add new routes that this extension's controllers handle.
|
||||
|
||||
See IRoutes.
|
||||
|
||||
'''
|
||||
self.modify_resource_download_route(map)
|
||||
map.redirect("/analytics/package/top", "/analytics/dataset/top")
|
||||
map.connect(
|
||||
'analytics', '/analytics/dataset/top',
|
||||
controller='ckanext.googleanalytics.controller:GAController',
|
||||
action='view'
|
||||
)
|
||||
return map
|
||||
|
||||
def get_helpers(self):
|
||||
'''Return the CKAN 2.0 template helper functions this plugin provides.
|
||||
|
||||
See ITemplateHelpers.
|
||||
|
||||
'''
|
||||
return {'googleanalytics_header': self.googleanalytics_header}
|
||||
|
||||
def googleanalytics_header(self):
|
||||
'''Render the googleanalytics_header snippet for CKAN 2.0 templates.
|
||||
|
||||
This is a template helper function that renders the
|
||||
googleanalytics_header jinja snippet. To be called from the jinja
|
||||
templates in this extension, see ITemplateHelpers.
|
||||
|
||||
'''
|
||||
|
||||
if self.enable_user_id and c.user:
|
||||
self.googleanalytics_fields['userId'] = str(c.userobj.id)
|
||||
|
||||
data = {
|
||||
'googleanalytics_id': self.googleanalytics_id,
|
||||
'googleanalytics_domain': self.googleanalytics_domain,
|
||||
'googleanalytics_fields': str(self.googleanalytics_fields),
|
||||
'googleanalytics_linked_domains': self.googleanalytics_linked_domains
|
||||
}
|
||||
return p.toolkit.render_snippet(
|
||||
'googleanalytics/snippets/googleanalytics_header.html', data)
|
||||
|
||||
def modify_resource_download_route(self, map):
|
||||
'''Modifies resource_download method in related controller
|
||||
to attach GA tracking code.
|
||||
'''
|
||||
|
||||
if '_routenames' in map.__dict__:
|
||||
if 'resource_download' in map.__dict__['_routenames']:
|
||||
route_data = map.__dict__['_routenames']['resource_download'].__dict__
|
||||
route_controller = route_data['defaults']['controller'].split(
|
||||
':')
|
||||
module = importlib.import_module(route_controller[0])
|
||||
controller_class = getattr(module, route_controller[1])
|
||||
controller_class.resource_download = wrap_resource_download(
|
||||
controller_class.resource_download)
|
||||
else:
|
||||
# If no custom uploader applied, use the default one
|
||||
PackageController.resource_download = wrap_resource_download(
|
||||
PackageController.resource_download)
|
|
@ -0,0 +1,163 @@
|
|||
from __future__ import absolute_import
|
||||
import ast
|
||||
import logging
|
||||
import urllib
|
||||
import ckanext.googleanalytics.commands as commands
|
||||
import paste.deploy.converters as converters
|
||||
import ckan.lib.helpers as h
|
||||
import ckan.plugins as p
|
||||
import ckan.plugins.toolkit as tk
|
||||
import urllib2
|
||||
from ckan.exceptions import CkanVersionException
|
||||
import threading
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
tk.requires_ckan_version("2.9")
|
||||
except CkanVersionException:
|
||||
from ckanext.googleanalytics.plugin.paster_plugin import GAMixinPlugin
|
||||
else:
|
||||
from ckanext.googleanalytics.plugin.flask_plugin import GAMixinPlugin
|
||||
|
||||
|
||||
class GoogleAnalyticsException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class AnalyticsPostThread(threading.Thread):
|
||||
"""Threaded Url POST"""
|
||||
|
||||
def __init__(self, queue):
|
||||
threading.Thread.__init__(self)
|
||||
self.queue = queue
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
# grabs host from queue
|
||||
data_dict = self.queue.get()
|
||||
|
||||
data = urllib.urlencode(data_dict)
|
||||
log.debug("Sending API event to Google Analytics: " + data)
|
||||
# send analytics
|
||||
urllib2.urlopen(
|
||||
"http://www.google-analytics.com/collect",
|
||||
data,
|
||||
# timeout in seconds
|
||||
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
|
||||
10,
|
||||
)
|
||||
|
||||
# signals to queue job is done
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin):
|
||||
p.implements(p.IConfigurable, inherit=True)
|
||||
p.implements(p.IConfigurer, inherit=True)
|
||||
p.implements(p.ITemplateHelpers)
|
||||
|
||||
def configure(self, config):
|
||||
"""Load config settings for this extension from config file.
|
||||
|
||||
See IConfigurable.
|
||||
|
||||
"""
|
||||
if "googleanalytics.id" not in config:
|
||||
msg = "Missing googleanalytics.id in config"
|
||||
raise GoogleAnalyticsException(msg)
|
||||
self.googleanalytics_id = config["googleanalytics.id"]
|
||||
self.googleanalytics_domain = config.get(
|
||||
"googleanalytics.domain", "auto"
|
||||
)
|
||||
self.googleanalytics_fields = ast.literal_eval(
|
||||
config.get("googleanalytics.fields", "{}")
|
||||
)
|
||||
|
||||
googleanalytics_linked_domains = config.get(
|
||||
"googleanalytics.linked_domains", ""
|
||||
)
|
||||
self.googleanalytics_linked_domains = [
|
||||
x.strip() for x in googleanalytics_linked_domains.split(",") if x
|
||||
]
|
||||
|
||||
if self.googleanalytics_linked_domains:
|
||||
self.googleanalytics_fields["allowLinker"] = "true"
|
||||
|
||||
self.googleanalytics_javascript_url = h.url_for_static(
|
||||
"/scripts/ckanext-googleanalytics.js"
|
||||
)
|
||||
|
||||
# If resource_prefix is not in config file then write the default value
|
||||
# to the config dict, otherwise templates seem to get 'true' when they
|
||||
# try to read resource_prefix from config.
|
||||
if "googleanalytics_resource_prefix" not in config:
|
||||
config[
|
||||
"googleanalytics_resource_prefix"
|
||||
] = commands.DEFAULT_RESOURCE_URL_TAG
|
||||
self.googleanalytics_resource_prefix = config[
|
||||
"googleanalytics_resource_prefix"
|
||||
]
|
||||
|
||||
self.show_downloads = converters.asbool(
|
||||
config.get("googleanalytics.show_downloads", True)
|
||||
)
|
||||
self.track_events = converters.asbool(
|
||||
config.get("googleanalytics.track_events", False)
|
||||
)
|
||||
self.enable_user_id = converters.asbool(
|
||||
config.get("googleanalytics.enable_user_id", False)
|
||||
)
|
||||
|
||||
if not converters.asbool(config.get("ckan.legacy_templates", "false")):
|
||||
p.toolkit.add_resource(
|
||||
"../fanstatic_library", "ckanext-googleanalytics"
|
||||
)
|
||||
|
||||
# spawn a pool of 5 threads, and pass them queue instance
|
||||
for i in range(5):
|
||||
t = AnalyticsPostThread(self.analytics_queue)
|
||||
t.setDaemon(True)
|
||||
t.start()
|
||||
|
||||
def update_config(self, config):
|
||||
"""Change the CKAN (Pylons) environment configuration.
|
||||
|
||||
See IConfigurer.
|
||||
|
||||
"""
|
||||
if converters.asbool(config.get("ckan.legacy_templates", "false")):
|
||||
p.toolkit.add_template_directory(config, "../legacy_templates")
|
||||
p.toolkit.add_public_directory(config, "../legacy_public")
|
||||
else:
|
||||
p.toolkit.add_template_directory(config, "../templates")
|
||||
|
||||
def get_helpers(self):
|
||||
"""Return the CKAN 2.0 template helper functions this plugin provides.
|
||||
|
||||
See ITemplateHelpers.
|
||||
|
||||
"""
|
||||
return {"googleanalytics_header": self.googleanalytics_header}
|
||||
|
||||
def googleanalytics_header(self):
|
||||
"""Render the googleanalytics_header snippet for CKAN 2.0 templates.
|
||||
|
||||
This is a template helper function that renders the
|
||||
googleanalytics_header jinja snippet. To be called from the jinja
|
||||
templates in this extension, see ITemplateHelpers.
|
||||
|
||||
"""
|
||||
|
||||
if self.enable_user_id and tk.c.user:
|
||||
self.googleanalytics_fields["userId"] = str(tk.c.userobj.id)
|
||||
|
||||
data = {
|
||||
"googleanalytics_id": self.googleanalytics_id,
|
||||
"googleanalytics_domain": self.googleanalytics_domain,
|
||||
"googleanalytics_fields": str(self.googleanalytics_fields),
|
||||
"googleanalytics_linked_domains": self.googleanalytics_linked_domains,
|
||||
}
|
||||
return p.toolkit.render_snippet(
|
||||
"googleanalytics/snippets/googleanalytics_header.html", data
|
||||
)
|
|
@ -0,0 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import Queue
|
||||
|
||||
import ckan.plugins as plugins
|
||||
|
||||
from ckanext.googleanalytics.views import ga
|
||||
|
||||
|
||||
class GAMixinPlugin(plugins.SingletonPlugin):
|
||||
plugins.implements(plugins.IBlueprint)
|
||||
|
||||
analytics_queue = Queue.Queue()
|
||||
|
||||
def get_blueprint(self):
|
||||
return [ga]
|
|
@ -0,0 +1,165 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import Queue
|
||||
|
||||
import hashlib
|
||||
import importlib
|
||||
|
||||
import ckan.plugins as plugins
|
||||
import ckan.plugins.toolkit as tk
|
||||
|
||||
from ckan.controllers.package import PackageController
|
||||
from pylons import config
|
||||
from routes.mapper import SubMapper
|
||||
|
||||
|
||||
class GAMixinPlugin(plugins.SingletonPlugin):
|
||||
plugins.implements(plugins.IRoutes)
|
||||
|
||||
analytics_queue = Queue.Queue()
|
||||
|
||||
def before_map(self, map):
|
||||
"""Add new routes that this extension's controllers handle.
|
||||
|
||||
See IRoutes.
|
||||
|
||||
"""
|
||||
# Helpers to reduce code clutter
|
||||
GET = dict(method=["GET"])
|
||||
PUT = dict(method=["PUT"])
|
||||
POST = dict(method=["POST"])
|
||||
DELETE = dict(method=["DELETE"])
|
||||
GET_POST = dict(method=["GET", "POST"])
|
||||
# intercept API calls that we want to capture analytics on
|
||||
register_list = [
|
||||
"package",
|
||||
"dataset",
|
||||
"resource",
|
||||
"tag",
|
||||
"group",
|
||||
"related",
|
||||
"revision",
|
||||
"licenses",
|
||||
"rating",
|
||||
"user",
|
||||
"activity",
|
||||
]
|
||||
register_list_str = "|".join(register_list)
|
||||
# /api ver 3 or none
|
||||
with SubMapper(
|
||||
map,
|
||||
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||
path_prefix="/api{ver:/3|}",
|
||||
ver="/3",
|
||||
) as m:
|
||||
m.connect(
|
||||
"/action/{logic_function}",
|
||||
action="action",
|
||||
conditions=GET_POST,
|
||||
)
|
||||
|
||||
# /api ver 1, 2, 3 or none
|
||||
with SubMapper(
|
||||
map,
|
||||
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||
path_prefix="/api{ver:/1|/2|/3|}",
|
||||
ver="/1",
|
||||
) as m:
|
||||
m.connect("/search/{register}", action="search")
|
||||
|
||||
# /api/rest ver 1, 2 or none
|
||||
with SubMapper(
|
||||
map,
|
||||
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||
path_prefix="/api{ver:/1|/2|}",
|
||||
ver="/1",
|
||||
requirements=dict(register=register_list_str),
|
||||
) as m:
|
||||
|
||||
m.connect("/rest/{register}", action="list", conditions=GET)
|
||||
m.connect("/rest/{register}", action="create", conditions=POST)
|
||||
m.connect("/rest/{register}/{id}", action="show", conditions=GET)
|
||||
m.connect("/rest/{register}/{id}", action="update", conditions=PUT)
|
||||
m.connect(
|
||||
"/rest/{register}/{id}", action="update", conditions=POST
|
||||
)
|
||||
m.connect(
|
||||
"/rest/{register}/{id}", action="delete", conditions=DELETE
|
||||
)
|
||||
|
||||
return map
|
||||
|
||||
def after_map(self, map):
|
||||
"""Add new routes that this extension's controllers handle.
|
||||
|
||||
See IRoutes.
|
||||
|
||||
"""
|
||||
self._modify_resource_download_route(map)
|
||||
map.redirect("/analytics/package/top", "/analytics/dataset/top")
|
||||
map.connect(
|
||||
"analytics",
|
||||
"/analytics/dataset/top",
|
||||
controller="ckanext.googleanalytics.controller:GAController",
|
||||
action="view",
|
||||
)
|
||||
return map
|
||||
|
||||
def _modify_resource_download_route(self, map):
|
||||
"""Modifies resource_download method in related controller
|
||||
to attach GA tracking code.
|
||||
"""
|
||||
|
||||
if "_routenames" in map.__dict__:
|
||||
if "resource_download" in map.__dict__["_routenames"]:
|
||||
route_data = map.__dict__["_routenames"][
|
||||
"resource_download"
|
||||
].__dict__
|
||||
route_controller = route_data["defaults"]["controller"].split(
|
||||
":"
|
||||
)
|
||||
module = importlib.import_module(route_controller[0])
|
||||
controller_class = getattr(module, route_controller[1])
|
||||
controller_class.resource_download = wrap_resource_download(
|
||||
controller_class.resource_download
|
||||
)
|
||||
else:
|
||||
# If no custom uploader applied, use the default one
|
||||
PackageController.resource_download = wrap_resource_download(
|
||||
PackageController.resource_download
|
||||
)
|
||||
|
||||
|
||||
def wrap_resource_download(func):
|
||||
def func_wrapper(cls, id, resource_id, filename=None):
|
||||
_post_analytics(
|
||||
tk.c.user,
|
||||
"CKAN Resource Download Request",
|
||||
"Resource",
|
||||
"Download",
|
||||
resource_id,
|
||||
)
|
||||
|
||||
return func(cls, id, resource_id, filename=None)
|
||||
|
||||
return func_wrapper
|
||||
|
||||
|
||||
def _post_analytics(
|
||||
user, event_type, request_obj_type, request_function, request_id
|
||||
):
|
||||
|
||||
if config.get("googleanalytics.id"):
|
||||
data_dict = {
|
||||
"v": 1,
|
||||
"tid": config.get("googleanalytics.id"),
|
||||
"cid": hashlib.md5(tk.c.user).hexdigest(),
|
||||
# customer id should be obfuscated
|
||||
"t": "event",
|
||||
"dh": tk.c.environ["HTTP_HOST"],
|
||||
"dp": tk.c.environ["PATH_INFO"],
|
||||
"dr": tk.c.environ.get("HTTP_REFERER", ""),
|
||||
"ec": event_type,
|
||||
"ea": request_obj_type + request_function,
|
||||
"el": request_id,
|
||||
}
|
||||
GAMixinPlugin.analytics_queue.put(data_dict)
|
|
@ -0,0 +1,89 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from flask import Blueprint
|
||||
import hashlib
|
||||
import ckan.views.api as api
|
||||
import ckan.views.resource as resource
|
||||
import ckan.logic as logic
|
||||
import logging
|
||||
from ckan.common import g
|
||||
import ckan.plugins.toolkit as tk
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
ga = Blueprint(u"google_analytics", "google_analytics",)
|
||||
|
||||
|
||||
def action(logic_function, ver=api.API_MAX_VERSION):
|
||||
try:
|
||||
function = logic.get_action(logic_function)
|
||||
side_effect_free = getattr(function, "side_effect_free", False)
|
||||
request_data = api._get_request_data(try_url_params=side_effect_free)
|
||||
if isinstance(request_data, dict):
|
||||
id = request_data.get("id", "")
|
||||
if "q" in request_data:
|
||||
id = request_data["q"]
|
||||
if "query" in request_data:
|
||||
id = request_data["query"]
|
||||
_post_analytics(g.user, "CKAN API Request", logic_function, "", id)
|
||||
except Exception, e:
|
||||
log.debug(e)
|
||||
pass
|
||||
|
||||
return api.action(logic_function, ver)
|
||||
|
||||
|
||||
ga.add_url_rule(
|
||||
u"/api/action/<logic_function>",
|
||||
methods=[u"GET", u"POST"],
|
||||
view_func=action,
|
||||
)
|
||||
ga.add_url_rule(
|
||||
u"/<int(min=3, max={0}):ver>/action/<logic_function>".format(
|
||||
api.API_MAX_VERSION
|
||||
),
|
||||
methods=[u"GET", u"POST"],
|
||||
view_func=action,
|
||||
)
|
||||
|
||||
|
||||
def download(id, resource_id, filename=None, package_type="dataset"):
|
||||
_post_analytics(
|
||||
g.user,
|
||||
"CKAN Resource Download Request",
|
||||
"Resource",
|
||||
"Download",
|
||||
resource_id,
|
||||
)
|
||||
return resource.download(package_type, id, resource_id, filename)
|
||||
|
||||
|
||||
ga.add_url_rule(
|
||||
u"/dataset/<id>/resource/<resource_id>/download", view_func=download
|
||||
)
|
||||
ga.add_url_rule(
|
||||
u"/dataset/<id>/resource/<resource_id>/download/<filename>",
|
||||
view_func=download,
|
||||
)
|
||||
|
||||
|
||||
def _post_analytics(
|
||||
user, event_type, request_obj_type, request_function, request_id
|
||||
):
|
||||
|
||||
from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin
|
||||
|
||||
if tk.config.get("googleanalytics.id"):
|
||||
data_dict = {
|
||||
"v": 1,
|
||||
"tid": tk.config.get("googleanalytics.id"),
|
||||
"cid": hashlib.md5(tk.c.user).hexdigest(),
|
||||
# customer id should be obfuscated
|
||||
"t": "event",
|
||||
"dh": tk.request.environ["HTTP_HOST"],
|
||||
"dp": tk.request.environ["PATH_INFO"],
|
||||
"dr": tk.request.environ.get("HTTP_REFERER", ""),
|
||||
"ec": event_type,
|
||||
"ea": request_obj_type + request_function,
|
||||
"el": request_id,
|
||||
}
|
||||
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
37
setup.py
37
setup.py
|
@ -1,28 +1,25 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
version = '0.1'
|
||||
version = "0.1"
|
||||
|
||||
setup(
|
||||
name='ckanext-googleanalytics',
|
||||
version=version,
|
||||
description="Add GA tracking and reporting to CKAN instance",
|
||||
long_description="""\
|
||||
name="ckanext-googleanalytics",
|
||||
version=version,
|
||||
description="Add GA tracking and reporting to CKAN instance",
|
||||
long_description="""\
|
||||
""",
|
||||
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
keywords='',
|
||||
author='Seb Bacon',
|
||||
author_email='seb.bacon@gmail.com',
|
||||
url='',
|
||||
license='',
|
||||
packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
|
||||
namespace_packages=['ckanext', 'ckanext.googleanalytics'],
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
install_requires=[
|
||||
|
||||
],
|
||||
entry_points=\
|
||||
"""
|
||||
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
keywords="",
|
||||
author="Seb Bacon",
|
||||
author_email="seb.bacon@gmail.com",
|
||||
url="",
|
||||
license="",
|
||||
packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
|
||||
namespace_packages=["ckanext", "ckanext.googleanalytics"],
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
install_requires=[],
|
||||
entry_points="""
|
||||
[ckan.plugins]
|
||||
# Add plugins here, eg
|
||||
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin
|
||||
|
|
|
@ -53,9 +53,8 @@ class ReusableServer(BaseHTTPServer.HTTPServer):
|
|||
|
||||
|
||||
def runmockserver():
|
||||
server_address = ('localhost', 6969)
|
||||
httpd = ReusableServer(server_address,
|
||||
MockHandler)
|
||||
server_address = ("localhost", 6969)
|
||||
httpd = ReusableServer(server_address, MockHandler)
|
||||
httpd_thread = threading.Thread(target=httpd.serve_til_quit)
|
||||
httpd_thread.setDaemon(True)
|
||||
httpd_thread.start()
|
||||
|
|
|
@ -15,7 +15,7 @@ import ckanext.googleanalytics.gasnippet as gasnippet
|
|||
|
||||
class MockClient(httplib.HTTPConnection):
|
||||
def request(self, http_request):
|
||||
filters = http_request.uri.query.get('filters')
|
||||
filters = http_request.uri.query.get("filters")
|
||||
path = http_request.uri.path
|
||||
if filters:
|
||||
if "dataset" in filters:
|
||||
|
@ -29,9 +29,9 @@ class MockClient(httplib.HTTPConnection):
|
|||
|
||||
class TestConfig(TestCase):
|
||||
def test_config(self):
|
||||
config = appconfig('config:test.ini', relative_to=conf_dir)
|
||||
config.local_conf['ckan.plugins'] = 'googleanalytics'
|
||||
config.local_conf['googleanalytics.id'] = ''
|
||||
config = appconfig("config:test.ini", relative_to=conf_dir)
|
||||
config.local_conf["ckan.plugins"] = "googleanalytics"
|
||||
config.local_conf["googleanalytics.id"] = ""
|
||||
command = LoadAnalytics("loadanalytics")
|
||||
command.CONFIG = config.local_conf
|
||||
self.assertRaises(Exception, command.run, [])
|
||||
|
@ -42,16 +42,19 @@ class TestLoadCommand(TestCase):
|
|||
def setup_class(cls):
|
||||
InitDB("initdb").run([]) # set up database tables
|
||||
|
||||
config = appconfig('config:test.ini', relative_to=conf_dir)
|
||||
config.local_conf['ckan.plugins'] = 'googleanalytics'
|
||||
config.local_conf['googleanalytics.username'] = 'borf'
|
||||
config.local_conf['googleanalytics.password'] = 'borf'
|
||||
config.local_conf['googleanalytics.id'] = 'UA-borf-1'
|
||||
config.local_conf['googleanalytics.show_downloads'] = 'true'
|
||||
config = appconfig("config:test.ini", relative_to=conf_dir)
|
||||
config.local_conf["ckan.plugins"] = "googleanalytics"
|
||||
config.local_conf["googleanalytics.username"] = "borf"
|
||||
config.local_conf["googleanalytics.password"] = "borf"
|
||||
config.local_conf["googleanalytics.id"] = "UA-borf-1"
|
||||
config.local_conf["googleanalytics.show_downloads"] = "true"
|
||||
cls.config = config.local_conf
|
||||
wsgiapp = make_app(config.global_conf, **config.local_conf)
|
||||
env = {'HTTP_ACCEPT': ('text/html;q=0.9,text/plain;'
|
||||
'q=0.8,image/png,*/*;q=0.5')}
|
||||
env = {
|
||||
"HTTP_ACCEPT": (
|
||||
"text/html;q=0.9,text/plain;" "q=0.8,image/png,*/*;q=0.5"
|
||||
)
|
||||
}
|
||||
cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env)
|
||||
CreateTestData.create()
|
||||
runmockserver()
|
||||
|
@ -64,14 +67,16 @@ class TestLoadCommand(TestCase):
|
|||
conn.getresponse()
|
||||
|
||||
def test_analytics_snippet(self):
|
||||
response = self.app.get(url_for(controller='tag', action='index'))
|
||||
code = gasnippet.header_code % (self.config['googleanalytics.id'],
|
||||
'auto')
|
||||
response = self.app.get(url_for(controller="tag", action="index"))
|
||||
code = gasnippet.header_code % (
|
||||
self.config["googleanalytics.id"],
|
||||
"auto",
|
||||
)
|
||||
assert code in response.body
|
||||
|
||||
def test_top_packages(self):
|
||||
command = LoadAnalytics("loadanalytics")
|
||||
command.TEST_HOST = MockClient('localhost', 6969)
|
||||
command.TEST_HOST = MockClient("localhost", 6969)
|
||||
command.CONFIG = self.config
|
||||
command.run([])
|
||||
packages = dbutil.get_top_packages()
|
||||
|
@ -81,31 +86,37 @@ class TestLoadCommand(TestCase):
|
|||
|
||||
def test_download_count_inserted(self):
|
||||
command = LoadAnalytics("loadanalytics")
|
||||
command.TEST_HOST = MockClient('localhost', 6969)
|
||||
command.TEST_HOST = MockClient("localhost", 6969)
|
||||
command.CONFIG = self.config
|
||||
command.run([])
|
||||
response = self.app.get(url_for(
|
||||
controller='package', action='read', id='annakarenina'
|
||||
))
|
||||
response = self.app.get(
|
||||
url_for(controller="package", action="read", id="annakarenina")
|
||||
)
|
||||
assert "[downloaded 4 times]" in response.body
|
||||
|
||||
def test_js_inserted_resource_view(self):
|
||||
from nose import SkipTest
|
||||
|
||||
raise SkipTest("Test won't work until CKAN 1.5.2")
|
||||
|
||||
from ckan.logic.action import get
|
||||
from ckan import model
|
||||
context = {'model': model, 'ignore_auth': True}
|
||||
data = {'id': 'annakarenina'}
|
||||
|
||||
context = {"model": model, "ignore_auth": True}
|
||||
data = {"id": "annakarenina"}
|
||||
pkg = get.package_show(context, data)
|
||||
resource_id = pkg['resources'][0]['id']
|
||||
resource_id = pkg["resources"][0]["id"]
|
||||
|
||||
command = LoadAnalytics("loadanalytics")
|
||||
command.TEST_HOST = MockClient('localhost', 6969)
|
||||
command.TEST_HOST = MockClient("localhost", 6969)
|
||||
command.CONFIG = self.config
|
||||
command.run([])
|
||||
response = self.app.get(url_for(
|
||||
controller='package', action='resource_read', id='annakarenina',
|
||||
resource_id=resource_id
|
||||
))
|
||||
response = self.app.get(
|
||||
url_for(
|
||||
controller="package",
|
||||
action="resource_read",
|
||||
id="annakarenina",
|
||||
resource_id=resource_id,
|
||||
)
|
||||
)
|
||||
assert 'onclick="javascript: _gaq.push(' in response.body
|
||||
|
|
Loading…
Reference in New Issue