proof of concept for accessing analytics data from CKAN

This commit is contained in:
Seb Bacon 2011-02-11 11:21:19 +00:00
parent 3f060228d3
commit 4d18d1c13d
4 changed files with 164 additions and 1 deletions

40
README.txt Normal file
View File

@ -0,0 +1,40 @@
A CKAN extension for doing things with Google Analytics:
* It sticks tracking code in your templates for you
* It provides a way for your controllers to access data from GA
The second item is all hard-coded, rough-and-ready, proof-of-concept
at the moment.
Installation
============
1. Install the extension as usual, e.g.
::
$ pip install -e hg+https://bitbucket.org/sebbacon/ckanext-googleanalytics#package=/ckanext-googleanalytics
2. Edit your development.ini (or similar) with:
::
googleanalytics.id = UA-1010101-1
googleanalytics.username = googleaccount@gmail.com
googleanalytics.password = googlepassword
# the following *must* match profile name in GA dashboard
googleanalytics.profile_name = mydomain.com/
3. Look at some stats within CKAN
Once your GA account has gathered some data, you can see some basic
information about the most popular packages at:
http://localhost:5000/analytics/package/top
TODO
====
* Turn the access-package-data-from-analytics-within-ckan
functionality into something resembling an API
* Understand the standard way to do caching in CKAN

View File

@ -1,9 +1,11 @@
import logging
log = logging.getLogger(__name__)
import os
from genshi.filters import Transformer
from genshi import HTML
from ckan.plugins import implements, SingletonPlugin
from ckan.plugins import IGenshiStreamFilter, IConfigurable
from ckan.plugins import IGenshiStreamFilter, IConfigurable, IRoutes
from ckan.plugins import IConfigurer
from gasnippet import gacode
@ -14,6 +16,8 @@ class GoogleAnalyticsException(Exception):
class GoogleAnalyticsPlugin(SingletonPlugin):
implements(IConfigurable, inherit=True)
implements(IGenshiStreamFilter, inherit=True)
implements(IRoutes, inherit=True)
implements(IConfigurer, inherit=True)
def configure(self, config):
self.config = config
@ -35,3 +39,17 @@ class GoogleAnalyticsPlugin(SingletonPlugin):
code = HTML(gacode % ga_id)
stream = stream | Transformer('head').append(code)
return stream
def after_map(self, map):
map.connect('analytics', '/analytics/package/top',
controller='ckanext.googleanalytics.controller:GAController',
action='view')
return map
def update_config(self, config):
here = os.path.dirname(__file__)
rootdir = os.path.dirname(os.path.dirname(here))
template_dir = os.path.join(rootdir, 'ckanext',
'googleanalytics', 'templates')
config['extra_template_paths'] = ','.join([template_dir,
config.get('extra_template_paths', '')])

View File

@ -0,0 +1,81 @@
from datetime import datetime
from datetime import timedelta
from pylons import config, request
from beaker import cache
from ckan.lib.base import *
from ckan.authz import Authorizer
from gdata.analytics import client
from ckan import model
from ckanext.googleanalytics import GoogleAnalyticsException
PACKAGE_URL = '/package/' # XXX get from routes...
class GAController(BaseController):
def view(self):
# get package objects corresponding to popular GA content
c.top_packages = self.get_top_packages()
return render('index.html')
def __str__(self):
# XXX hack to provide consistent cache key; what's the
# canonical way of doing caching like this in CKAN right now?
return "analyticscontroller"
def get_top_packages(self):
packages_data = self._get_ga_data()
items = []
authorizer = Authorizer()
q = authorizer.authorized_query(None, model.Package)
for package, visits in packages_data[:10]:
url_frag = package[len(PACKAGE_URL):]
if "/" in url_frag:
continue
item = q.filter("name = '%s'" % url_frag)
if not item.count():
continue
items.append((item.first(), visits))
return items
@cache.cache(expire=3600)
def _get_ga_data(self):
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
username = config.get('googleanalytics.username')
password = config.get('googleanalytics.password')
profile_name = config.get('googleanalytics.profile_name')
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
my_client.ClientLogin(username,
password,
SOURCE_APP_NAME)
account_query = client.AccountFeedQuery({'max-results': '300'})
feed = my_client.GetAccountFeed(account_query)
table_id = None
for entry in feed.entry:
if entry.title.text == profile_name:
table_id = entry.table_id.text
break
if not table_id:
msg = "Couldn't find a profile called '%s'" % profile_name
raise GoogleAnalyticsException(msg)
now = datetime.now()
to_date = now.strftime("%Y-%m-%d")
from_date = now - timedelta(14)
from_date = from_date.strftime("%Y-%m-%d")
query = client.DataFeedQuery({'ids': '%s' % table_id,
'start-date': from_date,
'end-date': to_date,
'dimensions': 'ga:source,ga:medium,ga:pagePath',
'metrics': 'ga:visits,ga:visitors,ga:newVisits',
'sort': '-ga:newVisits',
'filters': 'ga:pagePath=~^%s' % PACKAGE_URL,
'max-results': '50'
})
feed = my_client.GetDataFeed(query)
packages = []
for entry in feed.entry:
for dim in entry.dimension:
if dim.name == "ga:pagePath":
package = dim.value
newVisits = entry.get_metric('ga:visits').value
packages.append((package, newVisits))
return packages

View File

@ -0,0 +1,24 @@
<html xmlns:py="http://genshi.edgewall.org/"
xmlns:i18n="http://genshi.edgewall.org/i18n"
xmlns:xi="http://www.w3.org/2001/XInclude"
py:strip="">
<py:def function="page_title">Google analytics for CKAN</py:def>
<div py:match="content">
<h3>Most visited packages in last 14 days:</h3>
<ul>
<py:for each="package, count in c.top_packages">
<li>${count}: ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))}
</li>
</py:for>
</ul>
</div>
<xi:include href="layout.html" />
</html>