Fixes to make sure that data collected for reports is working
Make sure we handle the changes to the API to ensure we collect the dataset and resource information
This commit is contained in:
parent
2283571562
commit
459ff608a6
|
@ -3,7 +3,7 @@ CKAN Google Analytics Extension
|
||||||
|
|
||||||
**Status:** Production
|
**Status:** Production
|
||||||
|
|
||||||
**CKAN Version:** 1.5.*
|
**CKAN Version:** >= 1.5.*
|
||||||
|
|
||||||
|
|
||||||
Overview
|
Overview
|
||||||
|
@ -95,7 +95,7 @@ Installation
|
||||||
7. Import Google stats by running the following command from
|
7. Import Google stats by running the following command from
|
||||||
``src/ckanext-googleanalytics``::
|
``src/ckanext-googleanalytics``::
|
||||||
|
|
||||||
paster loadanalytics token.dat 2012-10-10 --config=../ckan/development.ini
|
paster loadanalytics token.dat --config=../ckan/development.ini
|
||||||
|
|
||||||
(Of course, pointing config at your specific site config and token.dat at the
|
(Of course, pointing config at your specific site config and token.dat at the
|
||||||
oauth file generated from the authorization step)
|
oauth file generated from the authorization step)
|
||||||
|
|
|
@ -15,7 +15,7 @@ log = logging.getLogger('ckanext.googleanalytics')
|
||||||
PACKAGE_URL = '/dataset/' # XXX get from routes...
|
PACKAGE_URL = '/dataset/' # XXX get from routes...
|
||||||
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
||||||
|
|
||||||
RESOURCE_URL_REGEX = re.compile('/dataset/[a-z0-9-_]+/resource/[a-z0-9-_]+')
|
RESOURCE_URL_REGEX = re.compile('/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)')
|
||||||
DATASET_EDIT_REGEX = re.compile('/dataset/edit/([a-z0-9-_]+)')
|
DATASET_EDIT_REGEX = re.compile('/dataset/edit/([a-z0-9-_]+)')
|
||||||
|
|
||||||
class GetAuthToken(CkanCommand):
|
class GetAuthToken(CkanCommand):
|
||||||
|
@ -262,12 +262,14 @@ class LoadAnalytics(CkanCommand):
|
||||||
raise Exception('Unable to create a service')
|
raise Exception('Unable to create a service')
|
||||||
self.profile_id = get_profile_id(self.service)
|
self.profile_id = get_profile_id(self.service)
|
||||||
|
|
||||||
if len(self.args):
|
if len(self.args) > 1:
|
||||||
if len(self.args) > 1 and self.args[1].lower() != 'internal':
|
if len(self.args) > 2 and self.args[1].lower() != 'internal':
|
||||||
raise Exception('Illegal argument %s' % self.args[1])
|
raise Exception('Illegal argument %s' % self.args[1])
|
||||||
self.bulk_import()
|
self.bulk_import()
|
||||||
else:
|
else:
|
||||||
packages_data = self.get_ga_data()
|
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
|
||||||
|
(PACKAGE_URL, self.resource_url_tag)
|
||||||
|
packages_data = self.get_ga_data(query_filter=query)
|
||||||
self.save_ga_data(packages_data)
|
self.save_ga_data(packages_data)
|
||||||
log.info("Saved %s records from google" % len(packages_data))
|
log.info("Saved %s records from google" % len(packages_data))
|
||||||
|
|
||||||
|
@ -277,10 +279,11 @@ class LoadAnalytics(CkanCommand):
|
||||||
for identifier, visits in packages_data.items():
|
for identifier, visits in packages_data.items():
|
||||||
recently = visits.get('recent', 0)
|
recently = visits.get('recent', 0)
|
||||||
ever = visits.get('ever', 0)
|
ever = visits.get('ever', 0)
|
||||||
if identifier.startswith(self.resource_url_tag):
|
matches = RESOURCE_URL_REGEX.match(identifier)
|
||||||
|
if matches:
|
||||||
resource_url = identifier[len(self.resource_url_tag):]
|
resource_url = identifier[len(self.resource_url_tag):]
|
||||||
resource = model.Session.query(model.Resource).autoflush(True)\
|
resource = model.Session.query(model.Resource).autoflush(True)\
|
||||||
.filter_by(url=resource_url).first()
|
.filter_by(id=matches.group(1)).first()
|
||||||
if not resource:
|
if not resource:
|
||||||
log.warning("Couldn't find resource %s" % resource_url)
|
log.warning("Couldn't find resource %s" % resource_url)
|
||||||
continue
|
continue
|
||||||
|
@ -307,26 +310,8 @@ class LoadAnalytics(CkanCommand):
|
||||||
username = self.CONFIG.get('googleanalytics.username')
|
username = self.CONFIG.get('googleanalytics.username')
|
||||||
password = self.CONFIG.get('googleanalytics.password')
|
password = self.CONFIG.get('googleanalytics.password')
|
||||||
ga_id = self.CONFIG.get('googleanalytics.id')
|
ga_id = self.CONFIG.get('googleanalytics.id')
|
||||||
if not username or not password or not ga_id:
|
if not ga_id:
|
||||||
raise Exception("No googleanalytics profile info in config")
|
raise Exception("No googleanalytics profile info in config")
|
||||||
if self.TEST_HOST:
|
|
||||||
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME,
|
|
||||||
http_client=self.TEST_HOST)
|
|
||||||
else:
|
|
||||||
my_client = client.AnalyticsClient(source=SOURCE_APP_NAME)
|
|
||||||
#my_client.ClientLogin(username, password, SOURCE_APP_NAME)
|
|
||||||
#account_query = client.AccountFeedQuery({'max-results': '300'})
|
|
||||||
#feed = my_client.GetAccountFeed(account_query)
|
|
||||||
#table_id = None
|
|
||||||
#for entry in feed.entry:
|
|
||||||
# if entry.get_property("ga:webPropertyId").value == ga_id:
|
|
||||||
# table_id = entry.table_id.text
|
|
||||||
# break
|
|
||||||
#if not table_id:
|
|
||||||
# msg = "Couldn't find a profile with id '%s'" % ga_id
|
|
||||||
# raise Exception(msg)
|
|
||||||
#self.table_id = table_id
|
|
||||||
#self.client = my_client
|
|
||||||
|
|
||||||
def ga_query(self, query_filter=None, from_date=None, to_date=None,
|
def ga_query(self, query_filter=None, from_date=None, to_date=None,
|
||||||
start_index=1, max_results=10000, metrics=None, sort=None):
|
start_index=1, max_results=10000, metrics=None, sort=None):
|
||||||
|
@ -335,10 +320,14 @@ class LoadAnalytics(CkanCommand):
|
||||||
if not to_date:
|
if not to_date:
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
to_date = now.strftime("%Y-%m-%d")
|
to_date = now.strftime("%Y-%m-%d")
|
||||||
|
if isinstance(from_date,datetime.date):
|
||||||
|
from_date = from_date.strftime("%Y-%m-%d")
|
||||||
if not metrics:
|
if not metrics:
|
||||||
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
||||||
if not sort:
|
if not sort:
|
||||||
sort = '-ga:newVisits'
|
sort = '-ga:uniquePageviews'
|
||||||
|
|
||||||
|
print '%s -> %s' % (from_date, to_date)
|
||||||
|
|
||||||
results = self.service.data().ga().get(ids='ga:' + self.profile_id,
|
results = self.service.data().ga().get(ids='ga:' + self.profile_id,
|
||||||
start_date=from_date,
|
start_date=from_date,
|
||||||
|
@ -365,17 +354,26 @@ class LoadAnalytics(CkanCommand):
|
||||||
recent_date = recent_date.strftime("%Y-%m-%d")
|
recent_date = recent_date.strftime("%Y-%m-%d")
|
||||||
floor_date = datetime.date(2005, 1, 1)
|
floor_date = datetime.date(2005, 1, 1)
|
||||||
packages = {}
|
packages = {}
|
||||||
queries = ['ga:pagePath=~^%s' % PACKAGE_URL,
|
queries = ['ga:pagePath=~%s' % PACKAGE_URL] #,
|
||||||
'ga:pagePath=~^%s' % self.resource_url_tag]
|
#'ga:pagePath=~%s' % self.resource_url_tag]
|
||||||
dates = {'recent': recent_date, 'ever': floor_date}
|
dates = {'recent': recent_date, 'ever': floor_date}
|
||||||
for date_name, date in dates.items():
|
for date_name, date in dates.iteritems():
|
||||||
for query in queries:
|
for query in queries:
|
||||||
results = self.ga_query(query_filter=query,
|
results = self.ga_query(query_filter=query,
|
||||||
|
metrics='ga:uniquePageviews',
|
||||||
from_date=date)
|
from_date=date)
|
||||||
|
if 'rows' in results:
|
||||||
for result in results.get('rows'):
|
for result in results.get('rows'):
|
||||||
package = result[0]
|
package = result[0]
|
||||||
|
if not package.startswith(PACKAGE_URL):
|
||||||
package = '/' + '/'.join(package.split('/')[2:])
|
package = '/' + '/'.join(package.split('/')[2:])
|
||||||
|
|
||||||
count = result[1]
|
count = result[1]
|
||||||
packages.setdefault(package, {})[date_name] = count
|
# Make sure we add the different representations of the same
|
||||||
|
# dataset /mysite.com & /www.mysite.com ...
|
||||||
|
val = 0
|
||||||
|
if package in packages and date_name in packages[package]:
|
||||||
|
val += packages[package][date_name]
|
||||||
|
packages.setdefault(package, {})[date_name] = int(count) + val
|
||||||
return packages
|
return packages
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue