big refactor
This commit is contained in:
parent
1f0f25a26f
commit
b69d89bf18
17
README.txt
17
README.txt
|
@ -25,13 +25,28 @@ Installation
|
||||||
# the following *must* match profile name in GA dashboard
|
# the following *must* match profile name in GA dashboard
|
||||||
googleanalytics.profile_name = mydomain.com/
|
googleanalytics.profile_name = mydomain.com/
|
||||||
|
|
||||||
|
3. Wait a day or so for some stats to be recorded in Google
|
||||||
|
|
||||||
3. Look at some stats within CKAN
|
4. Import Google stats by running the following command from
|
||||||
|
``src/ckanext-googleanalytics``::
|
||||||
|
|
||||||
|
paster loadanalytics --config=../ckan/development.ini
|
||||||
|
|
||||||
|
(Of course, pointing config at your specific site config)
|
||||||
|
|
||||||
|
5. Look at some stats within CKAN
|
||||||
|
|
||||||
Once your GA account has gathered some data, you can see some basic
|
Once your GA account has gathered some data, you can see some basic
|
||||||
information about the most popular packages at:
|
information about the most popular packages at:
|
||||||
http://localhost:5000/analytics/package/top
|
http://localhost:5000/analytics/package/top
|
||||||
|
|
||||||
|
By default the only data that is injected into the public-facing
|
||||||
|
website is on the package page, where number of downloads are
|
||||||
|
displayed next to each resource.
|
||||||
|
|
||||||
|
6. Consider putting the import command as a daily cron job, or
|
||||||
|
remember to run it by hand!
|
||||||
|
|
||||||
TODO
|
TODO
|
||||||
====
|
====
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
import logging
|
||||||
|
import datetime
|
||||||
|
from pylons import config
|
||||||
|
from ckan.lib.cli import CkanCommand
|
||||||
|
from gdata.analytics import client
|
||||||
|
import ckan.model as model
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
||||||
|
import dbutil
|
||||||
|
|
||||||
|
log = logging.getLogger('ckanext.googleanalytics')
|
||||||
|
PACKAGE_URL = '/package/' # XXX get from routes...
|
||||||
|
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
||||||
|
|
||||||
|
|
||||||
|
class LoadAnalytics(CkanCommand):
|
||||||
|
"""Parse data from Google Analytics API and store it in a local
|
||||||
|
database
|
||||||
|
"""
|
||||||
|
summary = __doc__.split('\n')[0]
|
||||||
|
usage = __doc__
|
||||||
|
max_args = 0
|
||||||
|
min_args = 0
|
||||||
|
|
||||||
|
def command(self):
|
||||||
|
self._load_config()
|
||||||
|
self.resource_url_tag = config.get('googleanalytics.resource_prefix',
|
||||||
|
DEFAULT_RESOURCE_URL_TAG)
|
||||||
|
self.setup_ga_connection()
|
||||||
|
# funny dance we need to do to make sure we've got a
|
||||||
|
# configured session
|
||||||
|
model.Session.remove()
|
||||||
|
model.Session.configure(bind=model.meta.engine)
|
||||||
|
self.parse_and_save()
|
||||||
|
|
||||||
|
def parse_and_save(self):
|
||||||
|
packages_data = self.get_ga_data()
|
||||||
|
self.save_ga_data(packages_data)
|
||||||
|
log.info("Saved %s records from google" % len(packages_data))
|
||||||
|
|
||||||
|
def save_ga_data(self, packages_data):
|
||||||
|
dbutil.init_tables()
|
||||||
|
for identifier, visits in packages_data.items():
|
||||||
|
recently = visits.get('recent', 0)
|
||||||
|
ever = visits.get('ever', 0)
|
||||||
|
if identifier.startswith(self.resource_url_tag):
|
||||||
|
resource_url = identifier[len(self.resource_url_tag):]
|
||||||
|
resource = model.Session.query(model.Resource).autoflush(True)\
|
||||||
|
.filter_by(url=resource_url).first()
|
||||||
|
if not resource:
|
||||||
|
log.warning("Couldn't find resource %s" % resource_url)
|
||||||
|
continue
|
||||||
|
dbutil.update_resource_visits(resource.id, recently, ever)
|
||||||
|
log.info("Updated %s with %s visits" % (resource.id, visits))
|
||||||
|
else:
|
||||||
|
package_name = identifier[len(PACKAGE_URL):]
|
||||||
|
if "/" in package_name:
|
||||||
|
log.warning("%s not a valid package name" % package_name)
|
||||||
|
continue
|
||||||
|
item = model.Package.by_name(package_name)
|
||||||
|
if not item:
|
||||||
|
log.warning("Couldn't find package %s" % package_name)
|
||||||
|
continue
|
||||||
|
dbutil.update_package_visits(item.id, recently, ever)
|
||||||
|
log.info("Updated %s with %s visits" % (item.id, visits))
|
||||||
|
model.Session.commit()
|
||||||
|
|
||||||
|
def setup_ga_connection(self):
|
||||||
|
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
|
||||||
|
username = config.get('googleanalytics.username')
|
||||||
|
password = config.get('googleanalytics.password')
|
||||||
|
profile_name = config.get('googleanalytics.profile_name')
|
||||||
|
if not username or not password or not profile_name:
|
||||||
|
raise Exception("No googleanalytics profile info in config")
|
||||||
|
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
||||||
|
my_client.ClientLogin(username,
|
||||||
|
password,
|
||||||
|
SOURCE_APP_NAME)
|
||||||
|
account_query = client.AccountFeedQuery({'max-results': '300'})
|
||||||
|
feed = my_client.GetAccountFeed(account_query)
|
||||||
|
table_id = None
|
||||||
|
for entry in feed.entry:
|
||||||
|
if entry.title.text == profile_name:
|
||||||
|
table_id = entry.table_id.text
|
||||||
|
break
|
||||||
|
if not table_id:
|
||||||
|
msg = "Couldn't find a profile called '%s'" % profile_name
|
||||||
|
raise Exception(msg)
|
||||||
|
self.table_id = table_id
|
||||||
|
self.client = my_client
|
||||||
|
|
||||||
|
def ga_query(self, query_filter=None, from_date=None):
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
to_date = now.strftime("%Y-%m-%d")
|
||||||
|
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
||||||
|
query = client.DataFeedQuery({'ids': '%s' % self.table_id,
|
||||||
|
'start-date': from_date,
|
||||||
|
'end-date': to_date,
|
||||||
|
'dimensions': 'ga:pagePath',
|
||||||
|
'metrics': metrics,
|
||||||
|
'sort': '-ga:newVisits',
|
||||||
|
'filters': query_filter,
|
||||||
|
'max-results': '10000'
|
||||||
|
})
|
||||||
|
feed = self.client.GetDataFeed(query)
|
||||||
|
return feed
|
||||||
|
|
||||||
|
def get_ga_data(self, query_filter=None):
|
||||||
|
"""Return a dictionary like
|
||||||
|
{'identifier': {'recent':3, 'ever':6}}
|
||||||
|
"""
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
recent_date = now - datetime.timedelta(14)
|
||||||
|
recent_date = recent_date.strftime("%Y-%m-%d")
|
||||||
|
floor_date = datetime.date(2005, 1, 1)
|
||||||
|
packages = {}
|
||||||
|
queries = ['ga:pagePath=~^%s' % PACKAGE_URL,
|
||||||
|
'ga:pagePath=~^%s' % self.resource_url_tag]
|
||||||
|
dates = {'recent': recent_date, 'ever': floor_date}
|
||||||
|
for date_name, date in dates.items():
|
||||||
|
for query in queries:
|
||||||
|
feed = self.ga_query(query_filter=query,
|
||||||
|
from_date=date)
|
||||||
|
for entry in feed.entry:
|
||||||
|
for dim in entry.dimension:
|
||||||
|
if dim.name == "ga:pagePath":
|
||||||
|
package = dim.value
|
||||||
|
count = entry.get_metric(
|
||||||
|
'ga:uniquePageviews').value or 0
|
||||||
|
packages.setdefault(package, {})[date_name] = count
|
||||||
|
return packages
|
||||||
|
|
|
@ -1,20 +1,15 @@
|
||||||
from datetime import datetime
|
import logging
|
||||||
from datetime import timedelta
|
|
||||||
from pylons import config, request
|
|
||||||
from beaker import cache
|
|
||||||
from ckan.lib.base import *
|
from ckan.lib.base import *
|
||||||
from ckan.authz import Authorizer
|
import dbutil
|
||||||
from gdata.analytics import client
|
|
||||||
from ckan import model
|
|
||||||
from ckan.model.authz import PSEUDO_USER__VISITOR
|
|
||||||
from ckanext.googleanalytics import GoogleAnalyticsException
|
|
||||||
|
|
||||||
PACKAGE_URL = '/package/' # XXX get from routes...
|
|
||||||
|
log = logging.getLogger('ckanext.googleanalytics')
|
||||||
|
|
||||||
|
|
||||||
class GAController(BaseController):
|
class GAController(BaseController):
|
||||||
def view(self):
|
def view(self):
|
||||||
# get package objects corresponding to popular GA content
|
# get package objects corresponding to popular GA content
|
||||||
|
self.parse_ga_data()
|
||||||
c.top_packages = self.get_top_packages()
|
c.top_packages = self.get_top_packages()
|
||||||
return render('index.html')
|
return render('index.html')
|
||||||
|
|
||||||
|
@ -24,61 +19,5 @@ class GAController(BaseController):
|
||||||
return "analyticscontroller"
|
return "analyticscontroller"
|
||||||
|
|
||||||
def get_top_packages(self):
|
def get_top_packages(self):
|
||||||
packages_data = self._get_ga_data()
|
items = dbutil.get_top_packages()
|
||||||
items = []
|
|
||||||
authorizer = Authorizer()
|
|
||||||
q = authorizer.authorized_query(PSEUDO_USER__VISITOR, model.Package)
|
|
||||||
for package, visits in packages_data[:10]:
|
|
||||||
url_frag = package[len(PACKAGE_URL):]
|
|
||||||
if "/" in url_frag:
|
|
||||||
continue
|
|
||||||
item = q.filter("name = '%s'" % url_frag)
|
|
||||||
if not item.count():
|
|
||||||
continue
|
|
||||||
items.append((item.first(), visits))
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
@cache.cache(expire=3600)
|
|
||||||
def _get_ga_data(self):
|
|
||||||
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
|
|
||||||
username = config.get('googleanalytics.username')
|
|
||||||
password = config.get('googleanalytics.password')
|
|
||||||
profile_name = config.get('googleanalytics.profile_name')
|
|
||||||
if not username or not password or not profile_name:
|
|
||||||
return []
|
|
||||||
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
|
||||||
my_client.ClientLogin(username,
|
|
||||||
password,
|
|
||||||
SOURCE_APP_NAME)
|
|
||||||
account_query = client.AccountFeedQuery({'max-results': '300'})
|
|
||||||
feed = my_client.GetAccountFeed(account_query)
|
|
||||||
table_id = None
|
|
||||||
for entry in feed.entry:
|
|
||||||
if entry.title.text == profile_name:
|
|
||||||
table_id = entry.table_id.text
|
|
||||||
break
|
|
||||||
if not table_id:
|
|
||||||
msg = "Couldn't find a profile called '%s'" % profile_name
|
|
||||||
raise GoogleAnalyticsException(msg)
|
|
||||||
now = datetime.now()
|
|
||||||
to_date = now.strftime("%Y-%m-%d")
|
|
||||||
from_date = now - timedelta(14)
|
|
||||||
from_date = from_date.strftime("%Y-%m-%d")
|
|
||||||
query = client.DataFeedQuery({'ids': '%s' % table_id,
|
|
||||||
'start-date': from_date,
|
|
||||||
'end-date': to_date,
|
|
||||||
'dimensions': 'ga:pagePath',
|
|
||||||
'metrics': 'ga:visits,ga:visitors,ga:newVisits',
|
|
||||||
'sort': '-ga:newVisits',
|
|
||||||
'filters': 'ga:pagePath=~^%s' % PACKAGE_URL,
|
|
||||||
'max-results': '50'
|
|
||||||
})
|
|
||||||
feed = my_client.GetDataFeed(query)
|
|
||||||
packages = []
|
|
||||||
for entry in feed.entry:
|
|
||||||
for dim in entry.dimension:
|
|
||||||
if dim.name == "ga:pagePath":
|
|
||||||
package = dim.value
|
|
||||||
newVisits = entry.get_metric('ga:visits').value
|
|
||||||
packages.append((package, newVisits))
|
|
||||||
return packages
|
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
import ckan.model as model
|
||||||
|
from ckan.authz import Authorizer
|
||||||
|
from ckan.model.authz import PSEUDO_USER__VISITOR
|
||||||
|
from ckan.lib.base import *
|
||||||
|
|
||||||
|
|
||||||
|
def init_tables():
|
||||||
|
try:
|
||||||
|
connection = model.Session.connection()
|
||||||
|
connection.execute("""CREATE TABLE package_stats (
|
||||||
|
package_id varchar(60) primary key,
|
||||||
|
visits_recently integer,
|
||||||
|
visits_ever integer);""")
|
||||||
|
except Exception, e:
|
||||||
|
if not "already exists" in e.args[0]:
|
||||||
|
raise
|
||||||
|
model.Session.commit()
|
||||||
|
try:
|
||||||
|
connection = model.Session.connection()
|
||||||
|
connection.execute("""CREATE TABLE resource_stats (
|
||||||
|
resource_id varchar(60) primary key,
|
||||||
|
visits_recently integer,
|
||||||
|
visits_ever integer);""")
|
||||||
|
except Exception, e:
|
||||||
|
if not "already exists" in e.args[0]:
|
||||||
|
raise
|
||||||
|
model.Session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def update_resource_visits(resource_id, recently, ever):
|
||||||
|
connection = model.Session.connection()
|
||||||
|
count = connection.execute(
|
||||||
|
"""SELECT count(resource_id) FROM resource_stats
|
||||||
|
WHERE resource_id = '%s'""" % resource_id).fetchone()
|
||||||
|
if count[0]:
|
||||||
|
connection.execute(
|
||||||
|
"""UPDATE resource_stats SET visits_recently = %s,
|
||||||
|
visits_ever = %s
|
||||||
|
WHERE resource_id = '%s'""" % (recently, ever, resource_id)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
connection.execute(
|
||||||
|
"""INSERT INTO resource_stats
|
||||||
|
(resource_id, visits_recently, visits_ever) VALUES
|
||||||
|
('%s', %s, %s)""" % (resource_id, recently, ever))
|
||||||
|
|
||||||
|
|
||||||
|
def get_resource_visits_for_url(url):
|
||||||
|
connection = model.Session.connection()
|
||||||
|
count = connection.execute(
|
||||||
|
"""SELECT visits_ever FROM resource_stats, resource
|
||||||
|
WHERE resource_id = resource.id
|
||||||
|
AND resource.url = '%s'""" % url).fetchone()
|
||||||
|
return count and count[0] or ""
|
||||||
|
|
||||||
|
|
||||||
|
def update_package_visits(package_id, recently, ever):
|
||||||
|
connection = model.Session.connection()
|
||||||
|
count = connection.execute(
|
||||||
|
"""SELECT count(package_id) FROM package_stats
|
||||||
|
WHERE package_id = '%s'""" % package_id).fetchone()
|
||||||
|
if count[0]:
|
||||||
|
connection.execute(
|
||||||
|
"""UPDATE package_stats SET visits = %s
|
||||||
|
WHERE package_id = '%s'""" % (recently, ever, package_id)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
connection.execute(
|
||||||
|
"""INSERT INTO package_stats
|
||||||
|
(package_id, visits_recently, visits_ever) VALUES
|
||||||
|
('%s', %s, %s)""" % (package_id, recently, ever))
|
||||||
|
|
||||||
|
|
||||||
|
def get_top_packages(limit=20):
|
||||||
|
items = []
|
||||||
|
authorizer = Authorizer()
|
||||||
|
q = authorizer.authorized_query(PSEUDO_USER__VISITOR,
|
||||||
|
model.Package)
|
||||||
|
connection = model.Session.connection()
|
||||||
|
res = connection.execute("""SELECT package_id, visits_recently
|
||||||
|
FROM package_stats
|
||||||
|
ORDER BY visits_recently DESC;""").fetchmany(limit)
|
||||||
|
for package_id, visits in res:
|
||||||
|
item = q.filter("package.id = '%s'" % package_id)
|
||||||
|
if not item.count():
|
||||||
|
continue
|
||||||
|
items.append((item.first(), visits))
|
||||||
|
return items
|
|
@ -0,0 +1,11 @@
|
||||||
|
from ckan import model
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
connection = model.Session.connection()
|
||||||
|
connection.execute("""CREATE TABLE IF NOT EXISTS package_downloads (
|
||||||
|
id integer primary_key,
|
||||||
|
package_id varchar(60),
|
||||||
|
download_visits integer,
|
||||||
|
views_visits integer);""")
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,19 @@
|
||||||
import logging
|
import logging
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
import os
|
import os
|
||||||
from genshi.filters import Transformer
|
from genshi.filters import Transformer
|
||||||
from genshi import HTML
|
from genshi import HTML
|
||||||
|
from genshi.core import START, TEXT
|
||||||
|
from genshi.filters.transform import INSIDE
|
||||||
|
from pylons import config
|
||||||
from ckan.plugins import implements, SingletonPlugin
|
from ckan.plugins import implements, SingletonPlugin
|
||||||
from ckan.plugins import IGenshiStreamFilter, IConfigurable, IRoutes
|
from ckan.plugins import IGenshiStreamFilter, IConfigurable, IRoutes
|
||||||
from ckan.plugins import IConfigurer
|
from ckan.plugins import IConfigurer
|
||||||
from ckan import model
|
|
||||||
from gasnippet import gacode
|
from gasnippet import gacode
|
||||||
|
from commands import DEFAULT_RESOURCE_URL_TAG
|
||||||
|
import dbutil
|
||||||
|
|
||||||
|
log = logging.getLogger('ckanext.googleanalytics')
|
||||||
|
|
||||||
|
|
||||||
class GoogleAnalyticsException(Exception):
|
class GoogleAnalyticsException(Exception):
|
||||||
|
@ -34,16 +38,36 @@ class GoogleAnalyticsPlugin(SingletonPlugin):
|
||||||
ga_id = self.config['googleanalytics.id']
|
ga_id = self.config['googleanalytics.id']
|
||||||
code = HTML(gacode % ga_id)
|
code = HTML(gacode % ga_id)
|
||||||
stream = stream | Transformer('head').append(code)
|
stream = stream | Transformer('head').append(code)
|
||||||
|
resource_url = config.get('googleanalytics.resource_prefix',
|
||||||
|
DEFAULT_RESOURCE_URL_TAG)
|
||||||
|
|
||||||
# add download tracking link
|
# add download tracking link
|
||||||
def js_attr(name, event):
|
def js_attr(name, event):
|
||||||
attrs = event[1][1]
|
attrs = event[1][1]
|
||||||
link = '/downloads/%s' % urllib.quote(attrs.get('href'))
|
link = '%s%s' % (resource_url,
|
||||||
|
urllib.quote(attrs.get('href')))
|
||||||
js = "javascript: _gaq.push(['_trackPageview', '%s']);" % link
|
js = "javascript: _gaq.push(['_trackPageview', '%s']);" % link
|
||||||
return js
|
return js
|
||||||
|
|
||||||
|
# add some stats
|
||||||
|
def download_adder(stream):
|
||||||
|
download_html = ' <span="downloads-count">(%s downloads)</span>'
|
||||||
|
count = None
|
||||||
|
for mark, (kind, data, pos) in stream:
|
||||||
|
if mark and kind == START:
|
||||||
|
href = data[1].get('href')
|
||||||
|
count = dbutil.get_resource_visits_for_url(href)
|
||||||
|
if count and kind == TEXT and mark == INSIDE:
|
||||||
|
yield mark, (kind,
|
||||||
|
data + download_html % count,
|
||||||
|
pos)
|
||||||
|
else:
|
||||||
|
yield mark, (kind, data, pos)
|
||||||
|
|
||||||
|
# perform the stream transform
|
||||||
stream = stream | Transformer(
|
stream = stream | Transformer(
|
||||||
'//div[@id="package"]//td/a')\
|
'//div[@id="package"]//td/a')\
|
||||||
.attr('onclick', js_attr)
|
.apply(download_adder).attr('onclick', js_attr)
|
||||||
|
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -27,5 +27,8 @@ setup(
|
||||||
[ckan.plugins]
|
[ckan.plugins]
|
||||||
# Add plugins here, eg
|
# Add plugins here, eg
|
||||||
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin
|
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin
|
||||||
|
|
||||||
|
[paste.paster_command]
|
||||||
|
loadanalytics = ckanext.googleanalytics.commands:LoadAnalytics
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue