big refactor
This commit is contained in:
parent
1f0f25a26f
commit
b69d89bf18
17
README.txt
17
README.txt
|
@ -25,13 +25,28 @@ Installation
|
|||
# the following *must* match profile name in GA dashboard
|
||||
googleanalytics.profile_name = mydomain.com/
|
||||
|
||||
3. Wait a day or so for some stats to be recorded in Google
|
||||
|
||||
3. Look at some stats within CKAN
|
||||
4. Import Google stats by running the following command from
|
||||
``src/ckanext-googleanalytics``::
|
||||
|
||||
paster loadanalytics --config=../ckan/development.ini
|
||||
|
||||
(Of course, pointing config at your specific site config)
|
||||
|
||||
5. Look at some stats within CKAN
|
||||
|
||||
Once your GA account has gathered some data, you can see some basic
|
||||
information about the most popular packages at:
|
||||
http://localhost:5000/analytics/package/top
|
||||
|
||||
By default the only data that is injected into the public-facing
|
||||
website is on the package page, where number of downloads are
|
||||
displayed next to each resource.
|
||||
|
||||
6. Consider putting the import command as a daily cron job, or
|
||||
remember to run it by hand!
|
||||
|
||||
TODO
|
||||
====
|
||||
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
import logging
|
||||
import datetime
|
||||
from pylons import config
|
||||
from ckan.lib.cli import CkanCommand
|
||||
from gdata.analytics import client
|
||||
import ckan.model as model
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
import dbutil
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
PACKAGE_URL = '/package/' # XXX get from routes...
|
||||
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
||||
|
||||
|
||||
class LoadAnalytics(CkanCommand):
|
||||
"""Parse data from Google Analytics API and store it in a local
|
||||
database
|
||||
"""
|
||||
summary = __doc__.split('\n')[0]
|
||||
usage = __doc__
|
||||
max_args = 0
|
||||
min_args = 0
|
||||
|
||||
def command(self):
|
||||
self._load_config()
|
||||
self.resource_url_tag = config.get('googleanalytics.resource_prefix',
|
||||
DEFAULT_RESOURCE_URL_TAG)
|
||||
self.setup_ga_connection()
|
||||
# funny dance we need to do to make sure we've got a
|
||||
# configured session
|
||||
model.Session.remove()
|
||||
model.Session.configure(bind=model.meta.engine)
|
||||
self.parse_and_save()
|
||||
|
||||
def parse_and_save(self):
|
||||
packages_data = self.get_ga_data()
|
||||
self.save_ga_data(packages_data)
|
||||
log.info("Saved %s records from google" % len(packages_data))
|
||||
|
||||
def save_ga_data(self, packages_data):
|
||||
dbutil.init_tables()
|
||||
for identifier, visits in packages_data.items():
|
||||
recently = visits.get('recent', 0)
|
||||
ever = visits.get('ever', 0)
|
||||
if identifier.startswith(self.resource_url_tag):
|
||||
resource_url = identifier[len(self.resource_url_tag):]
|
||||
resource = model.Session.query(model.Resource).autoflush(True)\
|
||||
.filter_by(url=resource_url).first()
|
||||
if not resource:
|
||||
log.warning("Couldn't find resource %s" % resource_url)
|
||||
continue
|
||||
dbutil.update_resource_visits(resource.id, recently, ever)
|
||||
log.info("Updated %s with %s visits" % (resource.id, visits))
|
||||
else:
|
||||
package_name = identifier[len(PACKAGE_URL):]
|
||||
if "/" in package_name:
|
||||
log.warning("%s not a valid package name" % package_name)
|
||||
continue
|
||||
item = model.Package.by_name(package_name)
|
||||
if not item:
|
||||
log.warning("Couldn't find package %s" % package_name)
|
||||
continue
|
||||
dbutil.update_package_visits(item.id, recently, ever)
|
||||
log.info("Updated %s with %s visits" % (item.id, visits))
|
||||
model.Session.commit()
|
||||
|
||||
def setup_ga_connection(self):
|
||||
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
|
||||
username = config.get('googleanalytics.username')
|
||||
password = config.get('googleanalytics.password')
|
||||
profile_name = config.get('googleanalytics.profile_name')
|
||||
if not username or not password or not profile_name:
|
||||
raise Exception("No googleanalytics profile info in config")
|
||||
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
||||
my_client.ClientLogin(username,
|
||||
password,
|
||||
SOURCE_APP_NAME)
|
||||
account_query = client.AccountFeedQuery({'max-results': '300'})
|
||||
feed = my_client.GetAccountFeed(account_query)
|
||||
table_id = None
|
||||
for entry in feed.entry:
|
||||
if entry.title.text == profile_name:
|
||||
table_id = entry.table_id.text
|
||||
break
|
||||
if not table_id:
|
||||
msg = "Couldn't find a profile called '%s'" % profile_name
|
||||
raise Exception(msg)
|
||||
self.table_id = table_id
|
||||
self.client = my_client
|
||||
|
||||
def ga_query(self, query_filter=None, from_date=None):
|
||||
now = datetime.datetime.now()
|
||||
to_date = now.strftime("%Y-%m-%d")
|
||||
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
||||
query = client.DataFeedQuery({'ids': '%s' % self.table_id,
|
||||
'start-date': from_date,
|
||||
'end-date': to_date,
|
||||
'dimensions': 'ga:pagePath',
|
||||
'metrics': metrics,
|
||||
'sort': '-ga:newVisits',
|
||||
'filters': query_filter,
|
||||
'max-results': '10000'
|
||||
})
|
||||
feed = self.client.GetDataFeed(query)
|
||||
return feed
|
||||
|
||||
def get_ga_data(self, query_filter=None):
|
||||
"""Return a dictionary like
|
||||
{'identifier': {'recent':3, 'ever':6}}
|
||||
"""
|
||||
now = datetime.datetime.now()
|
||||
recent_date = now - datetime.timedelta(14)
|
||||
recent_date = recent_date.strftime("%Y-%m-%d")
|
||||
floor_date = datetime.date(2005, 1, 1)
|
||||
packages = {}
|
||||
queries = ['ga:pagePath=~^%s' % PACKAGE_URL,
|
||||
'ga:pagePath=~^%s' % self.resource_url_tag]
|
||||
dates = {'recent': recent_date, 'ever': floor_date}
|
||||
for date_name, date in dates.items():
|
||||
for query in queries:
|
||||
feed = self.ga_query(query_filter=query,
|
||||
from_date=date)
|
||||
for entry in feed.entry:
|
||||
for dim in entry.dimension:
|
||||
if dim.name == "ga:pagePath":
|
||||
package = dim.value
|
||||
count = entry.get_metric(
|
||||
'ga:uniquePageviews').value or 0
|
||||
packages.setdefault(package, {})[date_name] = count
|
||||
return packages
|
||||
|
|
@ -1,20 +1,15 @@
|
|||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from pylons import config, request
|
||||
from beaker import cache
|
||||
import logging
|
||||
from ckan.lib.base import *
|
||||
from ckan.authz import Authorizer
|
||||
from gdata.analytics import client
|
||||
from ckan import model
|
||||
from ckan.model.authz import PSEUDO_USER__VISITOR
|
||||
from ckanext.googleanalytics import GoogleAnalyticsException
|
||||
import dbutil
|
||||
|
||||
PACKAGE_URL = '/package/' # XXX get from routes...
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
|
||||
|
||||
class GAController(BaseController):
|
||||
def view(self):
|
||||
# get package objects corresponding to popular GA content
|
||||
self.parse_ga_data()
|
||||
c.top_packages = self.get_top_packages()
|
||||
return render('index.html')
|
||||
|
||||
|
@ -24,61 +19,5 @@ class GAController(BaseController):
|
|||
return "analyticscontroller"
|
||||
|
||||
def get_top_packages(self):
|
||||
packages_data = self._get_ga_data()
|
||||
items = []
|
||||
authorizer = Authorizer()
|
||||
q = authorizer.authorized_query(PSEUDO_USER__VISITOR, model.Package)
|
||||
for package, visits in packages_data[:10]:
|
||||
url_frag = package[len(PACKAGE_URL):]
|
||||
if "/" in url_frag:
|
||||
continue
|
||||
item = q.filter("name = '%s'" % url_frag)
|
||||
if not item.count():
|
||||
continue
|
||||
items.append((item.first(), visits))
|
||||
items = dbutil.get_top_packages()
|
||||
return items
|
||||
|
||||
@cache.cache(expire=3600)
|
||||
def _get_ga_data(self):
|
||||
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
|
||||
username = config.get('googleanalytics.username')
|
||||
password = config.get('googleanalytics.password')
|
||||
profile_name = config.get('googleanalytics.profile_name')
|
||||
if not username or not password or not profile_name:
|
||||
return []
|
||||
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
||||
my_client.ClientLogin(username,
|
||||
password,
|
||||
SOURCE_APP_NAME)
|
||||
account_query = client.AccountFeedQuery({'max-results': '300'})
|
||||
feed = my_client.GetAccountFeed(account_query)
|
||||
table_id = None
|
||||
for entry in feed.entry:
|
||||
if entry.title.text == profile_name:
|
||||
table_id = entry.table_id.text
|
||||
break
|
||||
if not table_id:
|
||||
msg = "Couldn't find a profile called '%s'" % profile_name
|
||||
raise GoogleAnalyticsException(msg)
|
||||
now = datetime.now()
|
||||
to_date = now.strftime("%Y-%m-%d")
|
||||
from_date = now - timedelta(14)
|
||||
from_date = from_date.strftime("%Y-%m-%d")
|
||||
query = client.DataFeedQuery({'ids': '%s' % table_id,
|
||||
'start-date': from_date,
|
||||
'end-date': to_date,
|
||||
'dimensions': 'ga:pagePath',
|
||||
'metrics': 'ga:visits,ga:visitors,ga:newVisits',
|
||||
'sort': '-ga:newVisits',
|
||||
'filters': 'ga:pagePath=~^%s' % PACKAGE_URL,
|
||||
'max-results': '50'
|
||||
})
|
||||
feed = my_client.GetDataFeed(query)
|
||||
packages = []
|
||||
for entry in feed.entry:
|
||||
for dim in entry.dimension:
|
||||
if dim.name == "ga:pagePath":
|
||||
package = dim.value
|
||||
newVisits = entry.get_metric('ga:visits').value
|
||||
packages.append((package, newVisits))
|
||||
return packages
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
import ckan.model as model
|
||||
from ckan.authz import Authorizer
|
||||
from ckan.model.authz import PSEUDO_USER__VISITOR
|
||||
from ckan.lib.base import *
|
||||
|
||||
|
||||
def init_tables():
|
||||
try:
|
||||
connection = model.Session.connection()
|
||||
connection.execute("""CREATE TABLE package_stats (
|
||||
package_id varchar(60) primary key,
|
||||
visits_recently integer,
|
||||
visits_ever integer);""")
|
||||
except Exception, e:
|
||||
if not "already exists" in e.args[0]:
|
||||
raise
|
||||
model.Session.commit()
|
||||
try:
|
||||
connection = model.Session.connection()
|
||||
connection.execute("""CREATE TABLE resource_stats (
|
||||
resource_id varchar(60) primary key,
|
||||
visits_recently integer,
|
||||
visits_ever integer);""")
|
||||
except Exception, e:
|
||||
if not "already exists" in e.args[0]:
|
||||
raise
|
||||
model.Session.commit()
|
||||
|
||||
|
||||
def update_resource_visits(resource_id, recently, ever):
|
||||
connection = model.Session.connection()
|
||||
count = connection.execute(
|
||||
"""SELECT count(resource_id) FROM resource_stats
|
||||
WHERE resource_id = '%s'""" % resource_id).fetchone()
|
||||
if count[0]:
|
||||
connection.execute(
|
||||
"""UPDATE resource_stats SET visits_recently = %s,
|
||||
visits_ever = %s
|
||||
WHERE resource_id = '%s'""" % (recently, ever, resource_id)
|
||||
)
|
||||
else:
|
||||
connection.execute(
|
||||
"""INSERT INTO resource_stats
|
||||
(resource_id, visits_recently, visits_ever) VALUES
|
||||
('%s', %s, %s)""" % (resource_id, recently, ever))
|
||||
|
||||
|
||||
def get_resource_visits_for_url(url):
|
||||
connection = model.Session.connection()
|
||||
count = connection.execute(
|
||||
"""SELECT visits_ever FROM resource_stats, resource
|
||||
WHERE resource_id = resource.id
|
||||
AND resource.url = '%s'""" % url).fetchone()
|
||||
return count and count[0] or ""
|
||||
|
||||
|
||||
def update_package_visits(package_id, recently, ever):
|
||||
connection = model.Session.connection()
|
||||
count = connection.execute(
|
||||
"""SELECT count(package_id) FROM package_stats
|
||||
WHERE package_id = '%s'""" % package_id).fetchone()
|
||||
if count[0]:
|
||||
connection.execute(
|
||||
"""UPDATE package_stats SET visits = %s
|
||||
WHERE package_id = '%s'""" % (recently, ever, package_id)
|
||||
)
|
||||
else:
|
||||
connection.execute(
|
||||
"""INSERT INTO package_stats
|
||||
(package_id, visits_recently, visits_ever) VALUES
|
||||
('%s', %s, %s)""" % (package_id, recently, ever))
|
||||
|
||||
|
||||
def get_top_packages(limit=20):
|
||||
items = []
|
||||
authorizer = Authorizer()
|
||||
q = authorizer.authorized_query(PSEUDO_USER__VISITOR,
|
||||
model.Package)
|
||||
connection = model.Session.connection()
|
||||
res = connection.execute("""SELECT package_id, visits_recently
|
||||
FROM package_stats
|
||||
ORDER BY visits_recently DESC;""").fetchmany(limit)
|
||||
for package_id, visits in res:
|
||||
item = q.filter("package.id = '%s'" % package_id)
|
||||
if not item.count():
|
||||
continue
|
||||
items.append((item.first(), visits))
|
||||
return items
|
|
@ -0,0 +1,11 @@
|
|||
from ckan import model
|
||||
|
||||
def setup():
|
||||
connection = model.Session.connection()
|
||||
connection.execute("""CREATE TABLE IF NOT EXISTS package_downloads (
|
||||
id integer primary_key,
|
||||
package_id varchar(60),
|
||||
download_visits integer,
|
||||
views_visits integer);""")
|
||||
|
||||
|
|
@ -1,15 +1,19 @@
|
|||
import logging
|
||||
import urllib
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
import os
|
||||
from genshi.filters import Transformer
|
||||
from genshi import HTML
|
||||
from genshi.core import START, TEXT
|
||||
from genshi.filters.transform import INSIDE
|
||||
from pylons import config
|
||||
from ckan.plugins import implements, SingletonPlugin
|
||||
from ckan.plugins import IGenshiStreamFilter, IConfigurable, IRoutes
|
||||
from ckan.plugins import IConfigurer
|
||||
from ckan import model
|
||||
from gasnippet import gacode
|
||||
from commands import DEFAULT_RESOURCE_URL_TAG
|
||||
import dbutil
|
||||
|
||||
log = logging.getLogger('ckanext.googleanalytics')
|
||||
|
||||
|
||||
class GoogleAnalyticsException(Exception):
|
||||
|
@ -34,16 +38,36 @@ class GoogleAnalyticsPlugin(SingletonPlugin):
|
|||
ga_id = self.config['googleanalytics.id']
|
||||
code = HTML(gacode % ga_id)
|
||||
stream = stream | Transformer('head').append(code)
|
||||
resource_url = config.get('googleanalytics.resource_prefix',
|
||||
DEFAULT_RESOURCE_URL_TAG)
|
||||
|
||||
# add download tracking link
|
||||
def js_attr(name, event):
|
||||
attrs = event[1][1]
|
||||
link = '/downloads/%s' % urllib.quote(attrs.get('href'))
|
||||
link = '%s%s' % (resource_url,
|
||||
urllib.quote(attrs.get('href')))
|
||||
js = "javascript: _gaq.push(['_trackPageview', '%s']);" % link
|
||||
return js
|
||||
|
||||
# add some stats
|
||||
def download_adder(stream):
|
||||
download_html = ' <span="downloads-count">(%s downloads)</span>'
|
||||
count = None
|
||||
for mark, (kind, data, pos) in stream:
|
||||
if mark and kind == START:
|
||||
href = data[1].get('href')
|
||||
count = dbutil.get_resource_visits_for_url(href)
|
||||
if count and kind == TEXT and mark == INSIDE:
|
||||
yield mark, (kind,
|
||||
data + download_html % count,
|
||||
pos)
|
||||
else:
|
||||
yield mark, (kind, data, pos)
|
||||
|
||||
# perform the stream transform
|
||||
stream = stream | Transformer(
|
||||
'//div[@id="package"]//td/a')\
|
||||
.attr('onclick', js_attr)
|
||||
.apply(download_adder).attr('onclick', js_attr)
|
||||
|
||||
return stream
|
||||
|
||||
|
|
Loading…
Reference in New Issue