import logging import datetime from pylons import config from ckan.lib.cli import CkanCommand from gdata.analytics import client import ckan.model as model from sqlalchemy.orm import sessionmaker import dbutil log = logging.getLogger('ckanext.googleanalytics') PACKAGE_URL = '/package/' # XXX get from routes... DEFAULT_RESOURCE_URL_TAG = '/downloads/' class LoadAnalytics(CkanCommand): """Parse data from Google Analytics API and store it in a local database """ summary = __doc__.split('\n')[0] usage = __doc__ max_args = 0 min_args = 0 def command(self): self._load_config() self.resource_url_tag = config.get('googleanalytics.resource_prefix', DEFAULT_RESOURCE_URL_TAG) self.setup_ga_connection() # funny dance we need to do to make sure we've got a # configured session model.Session.remove() model.Session.configure(bind=model.meta.engine) self.parse_and_save() def parse_and_save(self): packages_data = self.get_ga_data() self.save_ga_data(packages_data) log.info("Saved %s records from google" % len(packages_data)) def save_ga_data(self, packages_data): dbutil.init_tables() for identifier, visits in packages_data.items(): recently = visits.get('recent', 0) ever = visits.get('ever', 0) if identifier.startswith(self.resource_url_tag): resource_url = identifier[len(self.resource_url_tag):] resource = model.Session.query(model.Resource).autoflush(True)\ .filter_by(url=resource_url).first() if not resource: log.warning("Couldn't find resource %s" % resource_url) continue dbutil.update_resource_visits(resource.id, recently, ever) log.info("Updated %s with %s visits" % (resource.id, visits)) else: package_name = identifier[len(PACKAGE_URL):] if "/" in package_name: log.warning("%s not a valid package name" % package_name) continue item = model.Package.by_name(package_name) if not item: log.warning("Couldn't find package %s" % package_name) continue dbutil.update_package_visits(item.id, recently, ever) log.info("Updated %s with %s visits" % (item.id, visits)) model.Session.commit() def setup_ga_connection(self): SOURCE_APP_NAME = "CKAN Google Analytics Plugin" username = config.get('googleanalytics.username') password = config.get('googleanalytics.password') profile_name = config.get('googleanalytics.profile_name') if not username or not password or not profile_name: raise Exception("No googleanalytics profile info in config") my_client = client.AnalyticsClient(source=SOURCE_APP_NAME) my_client.ClientLogin(username, password, SOURCE_APP_NAME) account_query = client.AccountFeedQuery({'max-results': '300'}) feed = my_client.GetAccountFeed(account_query) table_id = None for entry in feed.entry: if entry.title.text == profile_name: table_id = entry.table_id.text break if not table_id: msg = "Couldn't find a profile called '%s'" % profile_name raise Exception(msg) self.table_id = table_id self.client = my_client def ga_query(self, query_filter=None, from_date=None): now = datetime.datetime.now() to_date = now.strftime("%Y-%m-%d") metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews' query = client.DataFeedQuery({'ids': '%s' % self.table_id, 'start-date': from_date, 'end-date': to_date, 'dimensions': 'ga:pagePath', 'metrics': metrics, 'sort': '-ga:newVisits', 'filters': query_filter, 'max-results': '10000' }) feed = self.client.GetDataFeed(query) return feed def get_ga_data(self, query_filter=None): """Return a dictionary like {'identifier': {'recent':3, 'ever':6}} """ now = datetime.datetime.now() recent_date = now - datetime.timedelta(14) recent_date = recent_date.strftime("%Y-%m-%d") floor_date = datetime.date(2005, 1, 1) packages = {} queries = ['ga:pagePath=~^%s' % PACKAGE_URL, 'ga:pagePath=~^%s' % self.resource_url_tag] dates = {'recent': recent_date, 'ever': floor_date} for date_name, date in dates.items(): for query in queries: feed = self.ga_query(query_filter=query, from_date=date) for entry in feed.entry: for dim in entry.dimension: if dim.name == "ga:pagePath": package = dim.value count = entry.get_metric( 'ga:uniquePageviews').value or 0 packages.setdefault(package, {})[date_name] = count return packages