133 lines
5.5 KiB
Python
133 lines
5.5 KiB
Python
|
import logging
|
||
|
import datetime
|
||
|
from pylons import config
|
||
|
from ckan.lib.cli import CkanCommand
|
||
|
from gdata.analytics import client
|
||
|
import ckan.model as model
|
||
|
from sqlalchemy.orm import sessionmaker
|
||
|
|
||
|
import dbutil
|
||
|
|
||
|
log = logging.getLogger('ckanext.googleanalytics')
|
||
|
PACKAGE_URL = '/package/' # XXX get from routes...
|
||
|
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
||
|
|
||
|
|
||
|
class LoadAnalytics(CkanCommand):
|
||
|
"""Parse data from Google Analytics API and store it in a local
|
||
|
database
|
||
|
"""
|
||
|
summary = __doc__.split('\n')[0]
|
||
|
usage = __doc__
|
||
|
max_args = 0
|
||
|
min_args = 0
|
||
|
|
||
|
def command(self):
|
||
|
self._load_config()
|
||
|
self.resource_url_tag = config.get('googleanalytics.resource_prefix',
|
||
|
DEFAULT_RESOURCE_URL_TAG)
|
||
|
self.setup_ga_connection()
|
||
|
# funny dance we need to do to make sure we've got a
|
||
|
# configured session
|
||
|
model.Session.remove()
|
||
|
model.Session.configure(bind=model.meta.engine)
|
||
|
self.parse_and_save()
|
||
|
|
||
|
def parse_and_save(self):
|
||
|
packages_data = self.get_ga_data()
|
||
|
self.save_ga_data(packages_data)
|
||
|
log.info("Saved %s records from google" % len(packages_data))
|
||
|
|
||
|
def save_ga_data(self, packages_data):
|
||
|
dbutil.init_tables()
|
||
|
for identifier, visits in packages_data.items():
|
||
|
recently = visits.get('recent', 0)
|
||
|
ever = visits.get('ever', 0)
|
||
|
if identifier.startswith(self.resource_url_tag):
|
||
|
resource_url = identifier[len(self.resource_url_tag):]
|
||
|
resource = model.Session.query(model.Resource).autoflush(True)\
|
||
|
.filter_by(url=resource_url).first()
|
||
|
if not resource:
|
||
|
log.warning("Couldn't find resource %s" % resource_url)
|
||
|
continue
|
||
|
dbutil.update_resource_visits(resource.id, recently, ever)
|
||
|
log.info("Updated %s with %s visits" % (resource.id, visits))
|
||
|
else:
|
||
|
package_name = identifier[len(PACKAGE_URL):]
|
||
|
if "/" in package_name:
|
||
|
log.warning("%s not a valid package name" % package_name)
|
||
|
continue
|
||
|
item = model.Package.by_name(package_name)
|
||
|
if not item:
|
||
|
log.warning("Couldn't find package %s" % package_name)
|
||
|
continue
|
||
|
dbutil.update_package_visits(item.id, recently, ever)
|
||
|
log.info("Updated %s with %s visits" % (item.id, visits))
|
||
|
model.Session.commit()
|
||
|
|
||
|
def setup_ga_connection(self):
|
||
|
SOURCE_APP_NAME = "CKAN Google Analytics Plugin"
|
||
|
username = config.get('googleanalytics.username')
|
||
|
password = config.get('googleanalytics.password')
|
||
|
profile_name = config.get('googleanalytics.profile_name')
|
||
|
if not username or not password or not profile_name:
|
||
|
raise Exception("No googleanalytics profile info in config")
|
||
|
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
||
|
my_client.ClientLogin(username,
|
||
|
password,
|
||
|
SOURCE_APP_NAME)
|
||
|
account_query = client.AccountFeedQuery({'max-results': '300'})
|
||
|
feed = my_client.GetAccountFeed(account_query)
|
||
|
table_id = None
|
||
|
for entry in feed.entry:
|
||
|
if entry.title.text == profile_name:
|
||
|
table_id = entry.table_id.text
|
||
|
break
|
||
|
if not table_id:
|
||
|
msg = "Couldn't find a profile called '%s'" % profile_name
|
||
|
raise Exception(msg)
|
||
|
self.table_id = table_id
|
||
|
self.client = my_client
|
||
|
|
||
|
def ga_query(self, query_filter=None, from_date=None):
|
||
|
now = datetime.datetime.now()
|
||
|
to_date = now.strftime("%Y-%m-%d")
|
||
|
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
||
|
query = client.DataFeedQuery({'ids': '%s' % self.table_id,
|
||
|
'start-date': from_date,
|
||
|
'end-date': to_date,
|
||
|
'dimensions': 'ga:pagePath',
|
||
|
'metrics': metrics,
|
||
|
'sort': '-ga:newVisits',
|
||
|
'filters': query_filter,
|
||
|
'max-results': '10000'
|
||
|
})
|
||
|
feed = self.client.GetDataFeed(query)
|
||
|
return feed
|
||
|
|
||
|
def get_ga_data(self, query_filter=None):
|
||
|
"""Return a dictionary like
|
||
|
{'identifier': {'recent':3, 'ever':6}}
|
||
|
"""
|
||
|
now = datetime.datetime.now()
|
||
|
recent_date = now - datetime.timedelta(14)
|
||
|
recent_date = recent_date.strftime("%Y-%m-%d")
|
||
|
floor_date = datetime.date(2005, 1, 1)
|
||
|
packages = {}
|
||
|
queries = ['ga:pagePath=~^%s' % PACKAGE_URL,
|
||
|
'ga:pagePath=~^%s' % self.resource_url_tag]
|
||
|
dates = {'recent': recent_date, 'ever': floor_date}
|
||
|
for date_name, date in dates.items():
|
||
|
for query in queries:
|
||
|
feed = self.ga_query(query_filter=query,
|
||
|
from_date=date)
|
||
|
for entry in feed.entry:
|
||
|
for dim in entry.dimension:
|
||
|
if dim.name == "ga:pagePath":
|
||
|
package = dim.value
|
||
|
count = entry.get_metric(
|
||
|
'ga:uniquePageviews').value or 0
|
||
|
packages.setdefault(package, {})[date_name] = count
|
||
|
return packages
|
||
|
|