2.9 support
This commit is contained in:
parent
a365fcdf6d
commit
a5be073c04
|
@ -1,9 +1,11 @@
|
||||||
# this is a namespace package
|
# this is a namespace package
|
||||||
try:
|
try:
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
|
||||||
pkg_resources.declare_namespace(__name__)
|
pkg_resources.declare_namespace(__name__)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import pkgutil
|
import pkgutil
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# this is a namespace package
|
# this is a namespace package
|
||||||
try:
|
try:
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
|
||||||
pkg_resources.declare_namespace(__name__)
|
pkg_resources.declare_namespace(__name__)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import pkgutil
|
import pkgutil
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||||
|
|
|
@ -10,18 +10,19 @@ import ckan.model as model
|
||||||
|
|
||||||
import dbutil
|
import dbutil
|
||||||
|
|
||||||
log = logging.getLogger('ckanext.googleanalytics')
|
log = logging.getLogger("ckanext.googleanalytics")
|
||||||
PACKAGE_URL = '/dataset/' # XXX get from routes...
|
PACKAGE_URL = "/dataset/" # XXX get from routes...
|
||||||
DEFAULT_RESOURCE_URL_TAG = '/downloads/'
|
DEFAULT_RESOURCE_URL_TAG = "/downloads/"
|
||||||
|
|
||||||
RESOURCE_URL_REGEX = re.compile('/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)')
|
RESOURCE_URL_REGEX = re.compile("/dataset/[a-z0-9-_]+/resource/([a-z0-9-_]+)")
|
||||||
DATASET_EDIT_REGEX = re.compile('/dataset/edit/([a-z0-9-_]+)')
|
DATASET_EDIT_REGEX = re.compile("/dataset/edit/([a-z0-9-_]+)")
|
||||||
|
|
||||||
|
|
||||||
class InitDB(CkanCommand):
|
class InitDB(CkanCommand):
|
||||||
"""Initialise the local stats database tables
|
"""Initialise the local stats database tables
|
||||||
"""
|
"""
|
||||||
summary = __doc__.split('\n')[0]
|
|
||||||
|
summary = __doc__.split("\n")[0]
|
||||||
usage = __doc__
|
usage = __doc__
|
||||||
max_args = 0
|
max_args = 0
|
||||||
min_args = 0
|
min_args = 0
|
||||||
|
@ -44,7 +45,8 @@ class LoadAnalytics(CkanCommand):
|
||||||
date specifies start date for retrieving
|
date specifies start date for retrieving
|
||||||
analytics data YYYY-MM-DD format
|
analytics data YYYY-MM-DD format
|
||||||
"""
|
"""
|
||||||
summary = __doc__.split('\n')[0]
|
|
||||||
|
summary = __doc__.split("\n")[0]
|
||||||
usage = __doc__
|
usage = __doc__
|
||||||
max_args = 3
|
max_args = 3
|
||||||
min_args = 1
|
min_args = 1
|
||||||
|
@ -57,8 +59,8 @@ class LoadAnalytics(CkanCommand):
|
||||||
self.CONFIG = pylonsconfig
|
self.CONFIG = pylonsconfig
|
||||||
|
|
||||||
self.resource_url_tag = self.CONFIG.get(
|
self.resource_url_tag = self.CONFIG.get(
|
||||||
'googleanalytics_resource_prefix',
|
"googleanalytics_resource_prefix", DEFAULT_RESOURCE_URL_TAG
|
||||||
DEFAULT_RESOURCE_URL_TAG)
|
)
|
||||||
|
|
||||||
# funny dance we need to do to make sure we've got a
|
# funny dance we need to do to make sure we've got a
|
||||||
# configured session
|
# configured session
|
||||||
|
@ -69,41 +71,44 @@ class LoadAnalytics(CkanCommand):
|
||||||
def internal_save(self, packages_data, summary_date):
|
def internal_save(self, packages_data, summary_date):
|
||||||
engine = model.meta.engine
|
engine = model.meta.engine
|
||||||
# clear out existing data before adding new
|
# clear out existing data before adding new
|
||||||
sql = '''DELETE FROM tracking_summary
|
sql = (
|
||||||
WHERE tracking_date='%s'; ''' % summary_date
|
"""DELETE FROM tracking_summary
|
||||||
|
WHERE tracking_date='%s'; """
|
||||||
|
% summary_date
|
||||||
|
)
|
||||||
engine.execute(sql)
|
engine.execute(sql)
|
||||||
|
|
||||||
for url, count in packages_data.iteritems():
|
for url, count in packages_data.iteritems():
|
||||||
# If it matches the resource then we should mark it as a resource.
|
# If it matches the resource then we should mark it as a resource.
|
||||||
# For resources we don't currently find the package ID.
|
# For resources we don't currently find the package ID.
|
||||||
if RESOURCE_URL_REGEX.match(url):
|
if RESOURCE_URL_REGEX.match(url):
|
||||||
tracking_type = 'resource'
|
tracking_type = "resource"
|
||||||
else:
|
else:
|
||||||
tracking_type = 'page'
|
tracking_type = "page"
|
||||||
|
|
||||||
sql = '''INSERT INTO tracking_summary
|
sql = """INSERT INTO tracking_summary
|
||||||
(url, count, tracking_date, tracking_type)
|
(url, count, tracking_date, tracking_type)
|
||||||
VALUES (%s, %s, %s, %s);'''
|
VALUES (%s, %s, %s, %s);"""
|
||||||
engine.execute(sql, url, count, summary_date, tracking_type)
|
engine.execute(sql, url, count, summary_date, tracking_type)
|
||||||
|
|
||||||
# get ids for dataset urls
|
# get ids for dataset urls
|
||||||
sql = '''UPDATE tracking_summary t
|
sql = """UPDATE tracking_summary t
|
||||||
SET package_id = COALESCE(
|
SET package_id = COALESCE(
|
||||||
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
||||||
,'~~not~found~~')
|
,'~~not~found~~')
|
||||||
WHERE t.package_id IS NULL AND tracking_type = 'page';'''
|
WHERE t.package_id IS NULL AND tracking_type = 'page';"""
|
||||||
engine.execute(sql, PACKAGE_URL)
|
engine.execute(sql, PACKAGE_URL)
|
||||||
|
|
||||||
# get ids for dataset edit urls which aren't captured otherwise
|
# get ids for dataset edit urls which aren't captured otherwise
|
||||||
sql = '''UPDATE tracking_summary t
|
sql = """UPDATE tracking_summary t
|
||||||
SET package_id = COALESCE(
|
SET package_id = COALESCE(
|
||||||
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
(SELECT id FROM package p WHERE t.url = %s || p.name)
|
||||||
,'~~not~found~~')
|
,'~~not~found~~')
|
||||||
WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';'''
|
WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';"""
|
||||||
engine.execute(sql, '%sedit/' % PACKAGE_URL)
|
engine.execute(sql, "%sedit/" % PACKAGE_URL)
|
||||||
|
|
||||||
# update summary totals for resources
|
# update summary totals for resources
|
||||||
sql = '''UPDATE tracking_summary t1
|
sql = """UPDATE tracking_summary t1
|
||||||
SET running_total = (
|
SET running_total = (
|
||||||
SELECT sum(count)
|
SELECT sum(count)
|
||||||
FROM tracking_summary t2
|
FROM tracking_summary t2
|
||||||
|
@ -116,11 +121,11 @@ class LoadAnalytics(CkanCommand):
|
||||||
WHERE t1.url = t2.url
|
WHERE t1.url = t2.url
|
||||||
AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - 14
|
AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - 14
|
||||||
) + t1.count
|
) + t1.count
|
||||||
WHERE t1.running_total = 0 AND tracking_type = 'resource';'''
|
WHERE t1.running_total = 0 AND tracking_type = 'resource';"""
|
||||||
engine.execute(sql)
|
engine.execute(sql)
|
||||||
|
|
||||||
# update summary totals for pages
|
# update summary totals for pages
|
||||||
sql = '''UPDATE tracking_summary t1
|
sql = """UPDATE tracking_summary t1
|
||||||
SET running_total = (
|
SET running_total = (
|
||||||
SELECT sum(count)
|
SELECT sum(count)
|
||||||
FROM tracking_summary t2
|
FROM tracking_summary t2
|
||||||
|
@ -135,23 +140,23 @@ class LoadAnalytics(CkanCommand):
|
||||||
) + t1.count
|
) + t1.count
|
||||||
WHERE t1.running_total = 0 AND tracking_type = 'page'
|
WHERE t1.running_total = 0 AND tracking_type = 'page'
|
||||||
AND t1.package_id IS NOT NULL
|
AND t1.package_id IS NOT NULL
|
||||||
AND t1.package_id != '~~not~found~~';'''
|
AND t1.package_id != '~~not~found~~';"""
|
||||||
engine.execute(sql)
|
engine.execute(sql)
|
||||||
|
|
||||||
def bulk_import(self):
|
def bulk_import(self):
|
||||||
if len(self.args) == 3:
|
if len(self.args) == 3:
|
||||||
# Get summeries from specified date
|
# Get summeries from specified date
|
||||||
start_date = datetime.datetime.strptime(self.args[2], '%Y-%m-%d')
|
start_date = datetime.datetime.strptime(self.args[2], "%Y-%m-%d")
|
||||||
else:
|
else:
|
||||||
# No date given. See when we last have data for and get data
|
# No date given. See when we last have data for and get data
|
||||||
# from 2 days before then in case new data is available.
|
# from 2 days before then in case new data is available.
|
||||||
# If no date here then use 2010-01-01 as the start date
|
# If no date here then use 2010-01-01 as the start date
|
||||||
engine = model.meta.engine
|
engine = model.meta.engine
|
||||||
sql = '''SELECT tracking_date from tracking_summary
|
sql = """SELECT tracking_date from tracking_summary
|
||||||
ORDER BY tracking_date DESC LIMIT 1;'''
|
ORDER BY tracking_date DESC LIMIT 1;"""
|
||||||
result = engine.execute(sql).fetchall()
|
result = engine.execute(sql).fetchall()
|
||||||
if result:
|
if result:
|
||||||
start_date = result[0]['tracking_date']
|
start_date = result[0]["tracking_date"]
|
||||||
start_date += datetime.timedelta(-2)
|
start_date += datetime.timedelta(-2)
|
||||||
# convert date to datetime
|
# convert date to datetime
|
||||||
combine = datetime.datetime.combine
|
combine = datetime.datetime.combine
|
||||||
|
@ -161,14 +166,15 @@ class LoadAnalytics(CkanCommand):
|
||||||
end_date = datetime.datetime.now()
|
end_date = datetime.datetime.now()
|
||||||
while start_date < end_date:
|
while start_date < end_date:
|
||||||
stop_date = start_date + datetime.timedelta(1)
|
stop_date = start_date + datetime.timedelta(1)
|
||||||
packages_data = self.get_ga_data_new(start_date=start_date,
|
packages_data = self.get_ga_data_new(
|
||||||
end_date=stop_date)
|
start_date=start_date, end_date=stop_date
|
||||||
|
)
|
||||||
self.internal_save(packages_data, start_date)
|
self.internal_save(packages_data, start_date)
|
||||||
# sleep to rate limit requests
|
# sleep to rate limit requests
|
||||||
time.sleep(0.25)
|
time.sleep(0.25)
|
||||||
start_date = stop_date
|
start_date = stop_date
|
||||||
log.info('%s received %s' % (len(packages_data), start_date))
|
log.info("%s received %s" % (len(packages_data), start_date))
|
||||||
print '%s received %s' % (len(packages_data), start_date)
|
print "%s received %s" % (len(packages_data), start_date)
|
||||||
|
|
||||||
def get_ga_data_new(self, start_date=None, end_date=None):
|
def get_ga_data_new(self, start_date=None, end_date=None):
|
||||||
"""Get raw data from Google Analtyics for packages and
|
"""Get raw data from Google Analtyics for packages and
|
||||||
|
@ -182,32 +188,41 @@ class LoadAnalytics(CkanCommand):
|
||||||
end_date = end_date.strftime("%Y-%m-%d")
|
end_date = end_date.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
packages = {}
|
packages = {}
|
||||||
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
|
query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
|
||||||
(PACKAGE_URL, self.resource_url_tag)
|
PACKAGE_URL,
|
||||||
metrics = 'ga:uniquePageviews'
|
self.resource_url_tag,
|
||||||
sort = '-ga:uniquePageviews'
|
)
|
||||||
|
metrics = "ga:uniquePageviews"
|
||||||
|
sort = "-ga:uniquePageviews"
|
||||||
|
|
||||||
start_index = 1
|
start_index = 1
|
||||||
max_results = 10000
|
max_results = 10000
|
||||||
# data retrival is chunked
|
# data retrival is chunked
|
||||||
completed = False
|
completed = False
|
||||||
while not completed:
|
while not completed:
|
||||||
results = self.service.data().ga().get(ids='ga:%s' % self.profile_id,
|
results = (
|
||||||
filters=query,
|
self.service.data()
|
||||||
dimensions='ga:pagePath',
|
.ga()
|
||||||
start_date=start_date,
|
.get(
|
||||||
start_index=start_index,
|
ids="ga:%s" % self.profile_id,
|
||||||
max_results=max_results,
|
filters=query,
|
||||||
metrics=metrics,
|
dimensions="ga:pagePath",
|
||||||
sort=sort,
|
start_date=start_date,
|
||||||
end_date=end_date).execute()
|
start_index=start_index,
|
||||||
result_count = len(results.get('rows', []))
|
max_results=max_results,
|
||||||
|
metrics=metrics,
|
||||||
|
sort=sort,
|
||||||
|
end_date=end_date,
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
result_count = len(results.get("rows", []))
|
||||||
if result_count < max_results:
|
if result_count < max_results:
|
||||||
completed = True
|
completed = True
|
||||||
|
|
||||||
for result in results.get('rows', []):
|
for result in results.get("rows", []):
|
||||||
package = result[0]
|
package = result[0]
|
||||||
package = '/' + '/'.join(package.split('/')[2:])
|
package = "/" + "/".join(package.split("/")[2:])
|
||||||
count = result[1]
|
count = result[1]
|
||||||
packages[package] = int(count)
|
packages[package] = int(count)
|
||||||
|
|
||||||
|
@ -219,25 +234,27 @@ class LoadAnalytics(CkanCommand):
|
||||||
|
|
||||||
def parse_and_save(self):
|
def parse_and_save(self):
|
||||||
"""Grab raw data from Google Analytics and save to the database"""
|
"""Grab raw data from Google Analytics and save to the database"""
|
||||||
from ga_auth import (init_service, get_profile_id)
|
from ga_auth import init_service, get_profile_id
|
||||||
|
|
||||||
tokenfile = self.args[0]
|
tokenfile = self.args[0]
|
||||||
if not os.path.exists(tokenfile):
|
if not os.path.exists(tokenfile):
|
||||||
raise Exception('Cannot find the token file %s' % self.args[0])
|
raise Exception("Cannot find the token file %s" % self.args[0])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.service = init_service(self.args[0])
|
self.service = init_service(self.args[0])
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
raise Exception('Unable to create a service: {0}'.format(e))
|
raise Exception("Unable to create a service: {0}".format(e))
|
||||||
self.profile_id = get_profile_id(self.service)
|
self.profile_id = get_profile_id(self.service)
|
||||||
|
|
||||||
if len(self.args) > 1:
|
if len(self.args) > 1:
|
||||||
if len(self.args) > 2 and self.args[1].lower() != 'internal':
|
if len(self.args) > 2 and self.args[1].lower() != "internal":
|
||||||
raise Exception('Illegal argument %s' % self.args[1])
|
raise Exception("Illegal argument %s" % self.args[1])
|
||||||
self.bulk_import()
|
self.bulk_import()
|
||||||
else:
|
else:
|
||||||
query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
|
query = "ga:pagePath=~%s,ga:pagePath=~%s" % (
|
||||||
(PACKAGE_URL, self.resource_url_tag)
|
PACKAGE_URL,
|
||||||
|
self.resource_url_tag,
|
||||||
|
)
|
||||||
packages_data = self.get_ga_data(query_filter=query)
|
packages_data = self.get_ga_data(query_filter=query)
|
||||||
self.save_ga_data(packages_data)
|
self.save_ga_data(packages_data)
|
||||||
log.info("Saved %s records from google" % len(packages_data))
|
log.info("Saved %s records from google" % len(packages_data))
|
||||||
|
@ -246,20 +263,24 @@ class LoadAnalytics(CkanCommand):
|
||||||
"""Save tuples of packages_data to the database
|
"""Save tuples of packages_data to the database
|
||||||
"""
|
"""
|
||||||
for identifier, visits in packages_data.items():
|
for identifier, visits in packages_data.items():
|
||||||
recently = visits.get('recent', 0)
|
recently = visits.get("recent", 0)
|
||||||
ever = visits.get('ever', 0)
|
ever = visits.get("ever", 0)
|
||||||
matches = RESOURCE_URL_REGEX.match(identifier)
|
matches = RESOURCE_URL_REGEX.match(identifier)
|
||||||
if matches:
|
if matches:
|
||||||
resource_url = identifier[len(self.resource_url_tag):]
|
resource_url = identifier[len(self.resource_url_tag) :]
|
||||||
resource = model.Session.query(model.Resource).autoflush(True)\
|
resource = (
|
||||||
.filter_by(id=matches.group(1)).first()
|
model.Session.query(model.Resource)
|
||||||
|
.autoflush(True)
|
||||||
|
.filter_by(id=matches.group(1))
|
||||||
|
.first()
|
||||||
|
)
|
||||||
if not resource:
|
if not resource:
|
||||||
log.warning("Couldn't find resource %s" % resource_url)
|
log.warning("Couldn't find resource %s" % resource_url)
|
||||||
continue
|
continue
|
||||||
dbutil.update_resource_visits(resource.id, recently, ever)
|
dbutil.update_resource_visits(resource.id, recently, ever)
|
||||||
log.info("Updated %s with %s visits" % (resource.id, visits))
|
log.info("Updated %s with %s visits" % (resource.id, visits))
|
||||||
else:
|
else:
|
||||||
package_name = identifier[len(PACKAGE_URL):]
|
package_name = identifier[len(PACKAGE_URL) :]
|
||||||
if "/" in package_name:
|
if "/" in package_name:
|
||||||
log.warning("%s not a valid package name" % package_name)
|
log.warning("%s not a valid package name" % package_name)
|
||||||
continue
|
continue
|
||||||
|
@ -271,8 +292,16 @@ class LoadAnalytics(CkanCommand):
|
||||||
log.info("Updated %s with %s visits" % (item.id, visits))
|
log.info("Updated %s with %s visits" % (item.id, visits))
|
||||||
model.Session.commit()
|
model.Session.commit()
|
||||||
|
|
||||||
def ga_query(self, query_filter=None, from_date=None, to_date=None,
|
def ga_query(
|
||||||
start_index=1, max_results=10000, metrics=None, sort=None):
|
self,
|
||||||
|
query_filter=None,
|
||||||
|
from_date=None,
|
||||||
|
to_date=None,
|
||||||
|
start_index=1,
|
||||||
|
max_results=10000,
|
||||||
|
metrics=None,
|
||||||
|
sort=None,
|
||||||
|
):
|
||||||
"""Execute a query against Google Analytics
|
"""Execute a query against Google Analytics
|
||||||
"""
|
"""
|
||||||
if not to_date:
|
if not to_date:
|
||||||
|
@ -281,22 +310,28 @@ class LoadAnalytics(CkanCommand):
|
||||||
if isinstance(from_date, datetime.date):
|
if isinstance(from_date, datetime.date):
|
||||||
from_date = from_date.strftime("%Y-%m-%d")
|
from_date = from_date.strftime("%Y-%m-%d")
|
||||||
if not metrics:
|
if not metrics:
|
||||||
metrics = 'ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews'
|
metrics = "ga:visits,ga:visitors,ga:newVisits,ga:uniquePageviews"
|
||||||
if not sort:
|
if not sort:
|
||||||
sort = '-ga:uniquePageviews'
|
sort = "-ga:uniquePageviews"
|
||||||
|
|
||||||
print '%s -> %s' % (from_date, to_date)
|
print "%s -> %s" % (from_date, to_date)
|
||||||
|
|
||||||
results = self.service.data().ga().get(ids='ga:' + self.profile_id,
|
results = (
|
||||||
start_date=from_date,
|
self.service.data()
|
||||||
end_date=to_date,
|
.ga()
|
||||||
dimensions='ga:pagePath',
|
.get(
|
||||||
metrics=metrics,
|
ids="ga:" + self.profile_id,
|
||||||
sort=sort,
|
start_date=from_date,
|
||||||
start_index=start_index,
|
end_date=to_date,
|
||||||
filters=query_filter,
|
dimensions="ga:pagePath",
|
||||||
max_results=max_results
|
metrics=metrics,
|
||||||
).execute()
|
sort=sort,
|
||||||
|
start_index=start_index,
|
||||||
|
filters=query_filter,
|
||||||
|
max_results=max_results,
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_ga_data(self, query_filter=None, start_date=None, end_date=None):
|
def get_ga_data(self, query_filter=None, start_date=None, end_date=None):
|
||||||
|
@ -312,25 +347,31 @@ class LoadAnalytics(CkanCommand):
|
||||||
recent_date = recent_date.strftime("%Y-%m-%d")
|
recent_date = recent_date.strftime("%Y-%m-%d")
|
||||||
floor_date = datetime.date(2005, 1, 1)
|
floor_date = datetime.date(2005, 1, 1)
|
||||||
packages = {}
|
packages = {}
|
||||||
queries = ['ga:pagePath=~%s' % PACKAGE_URL]
|
queries = ["ga:pagePath=~%s" % PACKAGE_URL]
|
||||||
dates = {'recent': recent_date, 'ever': floor_date}
|
dates = {"recent": recent_date, "ever": floor_date}
|
||||||
for date_name, date in dates.iteritems():
|
for date_name, date in dates.iteritems():
|
||||||
for query in queries:
|
for query in queries:
|
||||||
results = self.ga_query(query_filter=query,
|
results = self.ga_query(
|
||||||
metrics='ga:uniquePageviews',
|
query_filter=query,
|
||||||
from_date=date)
|
metrics="ga:uniquePageviews",
|
||||||
if 'rows' in results:
|
from_date=date,
|
||||||
for result in results.get('rows'):
|
)
|
||||||
|
if "rows" in results:
|
||||||
|
for result in results.get("rows"):
|
||||||
package = result[0]
|
package = result[0]
|
||||||
if not package.startswith(PACKAGE_URL):
|
if not package.startswith(PACKAGE_URL):
|
||||||
package = '/' + '/'.join(package.split('/')[2:])
|
package = "/" + "/".join(package.split("/")[2:])
|
||||||
|
|
||||||
count = result[1]
|
count = result[1]
|
||||||
# Make sure we add the different representations of the same
|
# Make sure we add the different representations of the same
|
||||||
# dataset /mysite.com & /www.mysite.com ...
|
# dataset /mysite.com & /www.mysite.com ...
|
||||||
val = 0
|
val = 0
|
||||||
if package in packages and date_name in packages[package]:
|
if (
|
||||||
|
package in packages
|
||||||
|
and date_name in packages[package]
|
||||||
|
):
|
||||||
val += packages[package][date_name]
|
val += packages[package][date_name]
|
||||||
packages.setdefault(package, {})[date_name] = \
|
packages.setdefault(package, {})[date_name] = (
|
||||||
int(count) + val
|
int(count) + val
|
||||||
|
)
|
||||||
return packages
|
return packages
|
||||||
|
|
|
@ -11,32 +11,33 @@ from paste.util.multidict import MultiDict
|
||||||
|
|
||||||
from ckan.controllers.api import ApiController
|
from ckan.controllers.api import ApiController
|
||||||
|
|
||||||
log = logging.getLogger('ckanext.googleanalytics')
|
log = logging.getLogger("ckanext.googleanalytics")
|
||||||
|
|
||||||
|
|
||||||
class GAController(BaseController):
|
class GAController(BaseController):
|
||||||
def view(self):
|
def view(self):
|
||||||
# get package objects corresponding to popular GA content
|
# get package objects corresponding to popular GA content
|
||||||
c.top_resources = dbutil.get_top_resources(limit=10)
|
c.top_resources = dbutil.get_top_resources(limit=10)
|
||||||
return render('summary.html')
|
return render("summary.html")
|
||||||
|
|
||||||
|
|
||||||
class GAApiController(ApiController):
|
class GAApiController(ApiController):
|
||||||
# intercept API calls to record via google analytics
|
# intercept API calls to record via google analytics
|
||||||
def _post_analytics(
|
def _post_analytics(
|
||||||
self, user, request_obj_type, request_function, request_id):
|
self, user, request_obj_type, request_function, request_id
|
||||||
if config.get('googleanalytics.id'):
|
):
|
||||||
|
if config.get("googleanalytics.id"):
|
||||||
data_dict = {
|
data_dict = {
|
||||||
"v": 1,
|
"v": 1,
|
||||||
"tid": config.get('googleanalytics.id'),
|
"tid": config.get("googleanalytics.id"),
|
||||||
"cid": hashlib.md5(user).hexdigest(),
|
"cid": hashlib.md5(user).hexdigest(),
|
||||||
# customer id should be obfuscated
|
# customer id should be obfuscated
|
||||||
"t": "event",
|
"t": "event",
|
||||||
"dh": c.environ['HTTP_HOST'],
|
"dh": c.environ["HTTP_HOST"],
|
||||||
"dp": c.environ['PATH_INFO'],
|
"dp": c.environ["PATH_INFO"],
|
||||||
"dr": c.environ.get('HTTP_REFERER', ''),
|
"dr": c.environ.get("HTTP_REFERER", ""),
|
||||||
"ec": "CKAN API Request",
|
"ec": "CKAN API Request",
|
||||||
"ea": request_obj_type+request_function,
|
"ea": request_obj_type + request_function,
|
||||||
"el": request_id,
|
"el": request_id,
|
||||||
}
|
}
|
||||||
plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
||||||
|
@ -44,66 +45,72 @@ class GAApiController(ApiController):
|
||||||
def action(self, logic_function, ver=None):
|
def action(self, logic_function, ver=None):
|
||||||
try:
|
try:
|
||||||
function = logic.get_action(logic_function)
|
function = logic.get_action(logic_function)
|
||||||
side_effect_free = getattr(function, 'side_effect_free', False)
|
side_effect_free = getattr(function, "side_effect_free", False)
|
||||||
request_data = self._get_request_data(
|
request_data = self._get_request_data(
|
||||||
try_url_params=side_effect_free)
|
try_url_params=side_effect_free
|
||||||
|
)
|
||||||
if isinstance(request_data, dict):
|
if isinstance(request_data, dict):
|
||||||
id = request_data.get('id', '')
|
id = request_data.get("id", "")
|
||||||
if 'q' in request_data:
|
if "q" in request_data:
|
||||||
id = request_data['q']
|
id = request_data["q"]
|
||||||
if 'query' in request_data:
|
if "query" in request_data:
|
||||||
id = request_data['query']
|
id = request_data["query"]
|
||||||
self._post_analytics(c.user, logic_function, '', id)
|
self._post_analytics(c.user, logic_function, "", id)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
log.debug(e)
|
log.debug(e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return ApiController.action(self, logic_function, ver)
|
return ApiController.action(self, logic_function, ver)
|
||||||
|
|
||||||
def list(self, ver=None, register=None,
|
def list(self, ver=None, register=None, subregister=None, id=None):
|
||||||
subregister=None, id=None):
|
self._post_analytics(
|
||||||
self._post_analytics(c.user,
|
c.user,
|
||||||
register +
|
register + ("_" + str(subregister) if subregister else ""),
|
||||||
("_"+str(subregister) if subregister else ""),
|
"list",
|
||||||
"list",
|
id,
|
||||||
id)
|
)
|
||||||
return ApiController.list(self, ver, register, subregister, id)
|
return ApiController.list(self, ver, register, subregister, id)
|
||||||
|
|
||||||
def show(self, ver=None, register=None,
|
def show(
|
||||||
subregister=None, id=None, id2=None):
|
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||||
self._post_analytics(c.user,
|
):
|
||||||
register +
|
self._post_analytics(
|
||||||
("_"+str(subregister) if subregister else ""),
|
c.user,
|
||||||
"show",
|
register + ("_" + str(subregister) if subregister else ""),
|
||||||
id)
|
"show",
|
||||||
|
id,
|
||||||
|
)
|
||||||
return ApiController.show(self, ver, register, subregister, id, id2)
|
return ApiController.show(self, ver, register, subregister, id, id2)
|
||||||
|
|
||||||
def update(self, ver=None, register=None,
|
def update(
|
||||||
subregister=None, id=None, id2=None):
|
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||||
self._post_analytics(c.user,
|
):
|
||||||
register +
|
self._post_analytics(
|
||||||
("_"+str(subregister) if subregister else ""),
|
c.user,
|
||||||
"update",
|
register + ("_" + str(subregister) if subregister else ""),
|
||||||
id)
|
"update",
|
||||||
|
id,
|
||||||
|
)
|
||||||
return ApiController.update(self, ver, register, subregister, id, id2)
|
return ApiController.update(self, ver, register, subregister, id, id2)
|
||||||
|
|
||||||
def delete(self, ver=None, register=None,
|
def delete(
|
||||||
subregister=None, id=None, id2=None):
|
self, ver=None, register=None, subregister=None, id=None, id2=None
|
||||||
self._post_analytics(c.user,
|
):
|
||||||
register +
|
self._post_analytics(
|
||||||
("_"+str(subregister) if subregister else ""),
|
c.user,
|
||||||
"delete",
|
register + ("_" + str(subregister) if subregister else ""),
|
||||||
id)
|
"delete",
|
||||||
|
id,
|
||||||
|
)
|
||||||
return ApiController.delete(self, ver, register, subregister, id, id2)
|
return ApiController.delete(self, ver, register, subregister, id, id2)
|
||||||
|
|
||||||
def search(self, ver=None, register=None):
|
def search(self, ver=None, register=None):
|
||||||
id = None
|
id = None
|
||||||
try:
|
try:
|
||||||
params = MultiDict(self._get_search_params(request.params))
|
params = MultiDict(self._get_search_params(request.params))
|
||||||
if 'q' in params.keys():
|
if "q" in params.keys():
|
||||||
id = params['q']
|
id = params["q"]
|
||||||
if 'query' in params.keys():
|
if "query" in params.keys():
|
||||||
id = params['query']
|
id = params["query"]
|
||||||
except ValueError, e:
|
except ValueError, e:
|
||||||
log.debug(str(e))
|
log.debug(str(e))
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -3,6 +3,7 @@ from sqlalchemy.sql import select, text
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
|
|
||||||
import ckan.model as model
|
import ckan.model as model
|
||||||
|
|
||||||
# from ckan.model.authz import PSEUDO_USER__VISITOR
|
# from ckan.model.authz import PSEUDO_USER__VISITOR
|
||||||
from ckan.lib.base import *
|
from ckan.lib.base import *
|
||||||
|
|
||||||
|
@ -11,16 +12,20 @@ cached_tables = {}
|
||||||
|
|
||||||
def init_tables():
|
def init_tables():
|
||||||
metadata = MetaData()
|
metadata = MetaData()
|
||||||
package_stats = Table('package_stats', metadata,
|
package_stats = Table(
|
||||||
Column('package_id', String(60),
|
"package_stats",
|
||||||
primary_key=True),
|
metadata,
|
||||||
Column('visits_recently', Integer),
|
Column("package_id", String(60), primary_key=True),
|
||||||
Column('visits_ever', Integer))
|
Column("visits_recently", Integer),
|
||||||
resource_stats = Table('resource_stats', metadata,
|
Column("visits_ever", Integer),
|
||||||
Column('resource_id', String(60),
|
)
|
||||||
primary_key=True),
|
resource_stats = Table(
|
||||||
Column('visits_recently', Integer),
|
"resource_stats",
|
||||||
Column('visits_ever', Integer))
|
metadata,
|
||||||
|
Column("resource_id", String(60), primary_key=True),
|
||||||
|
Column("visits_recently", Integer),
|
||||||
|
Column("visits_ever", Integer),
|
||||||
|
)
|
||||||
metadata.create_all(model.meta.engine)
|
metadata.create_all(model.meta.engine)
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,63 +40,68 @@ def get_table(name):
|
||||||
|
|
||||||
def _update_visits(table_name, item_id, recently, ever):
|
def _update_visits(table_name, item_id, recently, ever):
|
||||||
stats = get_table(table_name)
|
stats = get_table(table_name)
|
||||||
id_col_name = "%s_id" % table_name[:-len("_stats")]
|
id_col_name = "%s_id" % table_name[: -len("_stats")]
|
||||||
id_col = getattr(stats.c, id_col_name)
|
id_col = getattr(stats.c, id_col_name)
|
||||||
s = select([func.count(id_col)],
|
s = select([func.count(id_col)], id_col == item_id)
|
||||||
id_col == item_id)
|
|
||||||
connection = model.Session.connection()
|
connection = model.Session.connection()
|
||||||
count = connection.execute(s).fetchone()
|
count = connection.execute(s).fetchone()
|
||||||
if count and count[0]:
|
if count and count[0]:
|
||||||
connection.execute(stats.update()\
|
connection.execute(
|
||||||
.where(id_col == item_id)\
|
stats.update()
|
||||||
.values(visits_recently=recently,
|
.where(id_col == item_id)
|
||||||
visits_ever=ever))
|
.values(visits_recently=recently, visits_ever=ever)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
values = {id_col_name: item_id,
|
values = {
|
||||||
'visits_recently': recently,
|
id_col_name: item_id,
|
||||||
'visits_ever': ever}
|
"visits_recently": recently,
|
||||||
connection.execute(stats.insert()\
|
"visits_ever": ever,
|
||||||
.values(**values))
|
}
|
||||||
|
connection.execute(stats.insert().values(**values))
|
||||||
|
|
||||||
|
|
||||||
def update_resource_visits(resource_id, recently, ever):
|
def update_resource_visits(resource_id, recently, ever):
|
||||||
return _update_visits("resource_stats",
|
return _update_visits("resource_stats", resource_id, recently, ever)
|
||||||
resource_id,
|
|
||||||
recently,
|
|
||||||
ever)
|
|
||||||
|
|
||||||
|
|
||||||
def update_package_visits(package_id, recently, ever):
|
def update_package_visits(package_id, recently, ever):
|
||||||
return _update_visits("package_stats",
|
return _update_visits("package_stats", package_id, recently, ever)
|
||||||
package_id,
|
|
||||||
recently,
|
|
||||||
ever)
|
|
||||||
|
|
||||||
|
|
||||||
def get_resource_visits_for_url(url):
|
def get_resource_visits_for_url(url):
|
||||||
connection = model.Session.connection()
|
connection = model.Session.connection()
|
||||||
count = connection.execute(
|
count = connection.execute(
|
||||||
text("""SELECT visits_ever FROM resource_stats, resource
|
text(
|
||||||
|
"""SELECT visits_ever FROM resource_stats, resource
|
||||||
WHERE resource_id = resource.id
|
WHERE resource_id = resource.id
|
||||||
AND resource.url = :url"""), url=url).fetchone()
|
AND resource.url = :url"""
|
||||||
|
),
|
||||||
|
url=url,
|
||||||
|
).fetchone()
|
||||||
return count and count[0] or ""
|
return count and count[0] or ""
|
||||||
|
|
||||||
|
|
||||||
""" get_top_packages is broken, and needs to be rewritten to work with
|
""" get_top_packages is broken, and needs to be rewritten to work with
|
||||||
CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.*
|
CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.*
|
||||||
|
|
||||||
See commit ffa86c010d5d25fa1881c6b915e48f3b44657612
|
See commit ffa86c010d5d25fa1881c6b915e48f3b44657612
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_top_packages(limit=20):
|
def get_top_packages(limit=20):
|
||||||
items = []
|
items = []
|
||||||
# caveat emptor: the query below will not filter out private
|
# caveat emptor: the query below will not filter out private
|
||||||
# or deleted datasets (TODO)
|
# or deleted datasets (TODO)
|
||||||
q = model.Session.query(model.Package)
|
q = model.Session.query(model.Package)
|
||||||
connection = model.Session.connection()
|
connection = model.Session.connection()
|
||||||
package_stats = get_table('package_stats')
|
package_stats = get_table("package_stats")
|
||||||
s = select([package_stats.c.package_id,
|
s = select(
|
||||||
package_stats.c.visits_recently,
|
[
|
||||||
package_stats.c.visits_ever])\
|
package_stats.c.package_id,
|
||||||
.order_by(package_stats.c.visits_recently.desc())
|
package_stats.c.visits_recently,
|
||||||
|
package_stats.c.visits_ever,
|
||||||
|
]
|
||||||
|
).order_by(package_stats.c.visits_recently.desc())
|
||||||
res = connection.execute(s).fetchmany(limit)
|
res = connection.execute(s).fetchmany(limit)
|
||||||
for package_id, recent, ever in res:
|
for package_id, recent, ever in res:
|
||||||
item = q.filter("package.id = '%s'" % package_id)
|
item = q.filter("package.id = '%s'" % package_id)
|
||||||
|
@ -104,15 +114,19 @@ def get_top_packages(limit=20):
|
||||||
def get_top_resources(limit=20):
|
def get_top_resources(limit=20):
|
||||||
items = []
|
items = []
|
||||||
connection = model.Session.connection()
|
connection = model.Session.connection()
|
||||||
resource_stats = get_table('resource_stats')
|
resource_stats = get_table("resource_stats")
|
||||||
s = select([resource_stats.c.resource_id,
|
s = select(
|
||||||
resource_stats.c.visits_recently,
|
[
|
||||||
resource_stats.c.visits_ever])\
|
resource_stats.c.resource_id,
|
||||||
.order_by(resource_stats.c.visits_recently.desc())
|
resource_stats.c.visits_recently,
|
||||||
|
resource_stats.c.visits_ever,
|
||||||
|
]
|
||||||
|
).order_by(resource_stats.c.visits_recently.desc())
|
||||||
res = connection.execute(s).fetchmany(limit)
|
res = connection.execute(s).fetchmany(limit)
|
||||||
for resource_id, recent, ever in res:
|
for resource_id, recent, ever in res:
|
||||||
item = model.Session.query(model.Resource)\
|
item = model.Session.query(model.Resource).filter(
|
||||||
.filter("resource.id = '%s'" % resource_id)
|
"resource.id = '%s'" % resource_id
|
||||||
|
)
|
||||||
if not item.count():
|
if not item.count():
|
||||||
continue
|
continue
|
||||||
items.append((item.first(), recent, ever))
|
items.append((item.first(), recent, ever))
|
||||||
|
|
|
@ -10,10 +10,9 @@ def _prepare_credentials(credentials_filename):
|
||||||
Either returns the user's oauth credentials or uses the credentials
|
Either returns the user's oauth credentials or uses the credentials
|
||||||
file to generate a token (by forcing the user to login in the browser)
|
file to generate a token (by forcing the user to login in the browser)
|
||||||
"""
|
"""
|
||||||
scope = ['https://www.googleapis.com/auth/analytics.readonly']
|
scope = ["https://www.googleapis.com/auth/analytics.readonly"]
|
||||||
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
||||||
credentials_filename,
|
credentials_filename, scopes=scope
|
||||||
scopes=scope
|
|
||||||
)
|
)
|
||||||
return credentials
|
return credentials
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ def init_service(credentials_file):
|
||||||
credentials = _prepare_credentials(credentials_file)
|
credentials = _prepare_credentials(credentials_file)
|
||||||
http = credentials.authorize(http) # authorize the http object
|
http = credentials.authorize(http) # authorize the http object
|
||||||
|
|
||||||
return build('analytics', 'v3', http=http)
|
return build("analytics", "v3", http=http)
|
||||||
|
|
||||||
|
|
||||||
def get_profile_id(service):
|
def get_profile_id(service):
|
||||||
|
@ -42,23 +41,31 @@ def get_profile_id(service):
|
||||||
"""
|
"""
|
||||||
accounts = service.management().accounts().list().execute()
|
accounts = service.management().accounts().list().execute()
|
||||||
|
|
||||||
if not accounts.get('items'):
|
if not accounts.get("items"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
accountName = config.get('googleanalytics.account')
|
accountName = config.get("googleanalytics.account")
|
||||||
webPropertyId = config.get('googleanalytics.id')
|
webPropertyId = config.get("googleanalytics.id")
|
||||||
for acc in accounts.get('items'):
|
for acc in accounts.get("items"):
|
||||||
if acc.get('name') == accountName:
|
if acc.get("name") == accountName:
|
||||||
accountId = acc.get('id')
|
accountId = acc.get("id")
|
||||||
|
|
||||||
# TODO: check, whether next line is doing something useful.
|
# TODO: check, whether next line is doing something useful.
|
||||||
webproperties = service.management().webproperties().list(
|
webproperties = (
|
||||||
accountId=accountId).execute()
|
service.management()
|
||||||
|
.webproperties()
|
||||||
|
.list(accountId=accountId)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
profiles = service.management().profiles().list(
|
profiles = (
|
||||||
accountId=accountId, webPropertyId=webPropertyId).execute()
|
service.management()
|
||||||
|
.profiles()
|
||||||
|
.list(accountId=accountId, webPropertyId=webPropertyId)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
if profiles.get('items'):
|
if profiles.get("items"):
|
||||||
return profiles.get('items')[0].get('id')
|
return profiles.get("items")[0].get("id")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
from ckan import model
|
|
||||||
|
|
||||||
def setup():
|
|
||||||
connection = model.Session.connection()
|
|
||||||
connection.execute("""CREATE TABLE IF NOT EXISTS package_downloads (
|
|
||||||
id integer primary_key,
|
|
||||||
package_id varchar(60),
|
|
||||||
download_visits integer,
|
|
||||||
views_visits integer);""")
|
|
||||||
|
|
||||||
|
|
|
@ -1,275 +0,0 @@
|
||||||
import ast
|
|
||||||
import logging
|
|
||||||
import urllib
|
|
||||||
import commands
|
|
||||||
import paste.deploy.converters as converters
|
|
||||||
from ckan.lib.base import c
|
|
||||||
import ckan.lib.helpers as h
|
|
||||||
import ckan.plugins as p
|
|
||||||
from routes.mapper import SubMapper
|
|
||||||
from pylons import config
|
|
||||||
from ckan.controllers.package import PackageController
|
|
||||||
|
|
||||||
import urllib2
|
|
||||||
import importlib
|
|
||||||
import hashlib
|
|
||||||
|
|
||||||
import threading
|
|
||||||
import Queue
|
|
||||||
|
|
||||||
log = logging.getLogger('ckanext.googleanalytics')
|
|
||||||
|
|
||||||
|
|
||||||
def _post_analytics(
|
|
||||||
user, event_type, request_obj_type, request_function, request_id):
|
|
||||||
|
|
||||||
if config.get('googleanalytics.id'):
|
|
||||||
data_dict = {
|
|
||||||
"v": 1,
|
|
||||||
"tid": config.get('googleanalytics.id'),
|
|
||||||
"cid": hashlib.md5(c.user).hexdigest(),
|
|
||||||
# customer id should be obfuscated
|
|
||||||
"t": "event",
|
|
||||||
"dh": c.environ['HTTP_HOST'],
|
|
||||||
"dp": c.environ['PATH_INFO'],
|
|
||||||
"dr": c.environ.get('HTTP_REFERER', ''),
|
|
||||||
"ec": event_type,
|
|
||||||
"ea": request_obj_type + request_function,
|
|
||||||
"el": request_id,
|
|
||||||
}
|
|
||||||
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_resource_download(func):
|
|
||||||
|
|
||||||
def func_wrapper(cls, id, resource_id, filename=None):
|
|
||||||
_post_analytics(
|
|
||||||
c.user,
|
|
||||||
"CKAN Resource Download Request",
|
|
||||||
"Resource",
|
|
||||||
"Download",
|
|
||||||
resource_id
|
|
||||||
)
|
|
||||||
|
|
||||||
return func(cls, id, resource_id, filename=None)
|
|
||||||
|
|
||||||
return func_wrapper
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleAnalyticsException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class AnalyticsPostThread(threading.Thread):
|
|
||||||
"""Threaded Url POST"""
|
|
||||||
def __init__(self, queue):
|
|
||||||
threading.Thread.__init__(self)
|
|
||||||
self.queue = queue
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
while True:
|
|
||||||
# grabs host from queue
|
|
||||||
data_dict = self.queue.get()
|
|
||||||
|
|
||||||
data = urllib.urlencode(data_dict)
|
|
||||||
log.debug("Sending API event to Google Analytics: " + data)
|
|
||||||
# send analytics
|
|
||||||
urllib2.urlopen(
|
|
||||||
"http://www.google-analytics.com/collect",
|
|
||||||
data,
|
|
||||||
# timeout in seconds
|
|
||||||
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
|
|
||||||
10)
|
|
||||||
|
|
||||||
# signals to queue job is done
|
|
||||||
self.queue.task_done()
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleAnalyticsPlugin(p.SingletonPlugin):
|
|
||||||
p.implements(p.IConfigurable, inherit=True)
|
|
||||||
p.implements(p.IRoutes, inherit=True)
|
|
||||||
p.implements(p.IConfigurer, inherit=True)
|
|
||||||
p.implements(p.ITemplateHelpers)
|
|
||||||
|
|
||||||
analytics_queue = Queue.Queue()
|
|
||||||
|
|
||||||
def configure(self, config):
|
|
||||||
'''Load config settings for this extension from config file.
|
|
||||||
|
|
||||||
See IConfigurable.
|
|
||||||
|
|
||||||
'''
|
|
||||||
if 'googleanalytics.id' not in config:
|
|
||||||
msg = "Missing googleanalytics.id in config"
|
|
||||||
raise GoogleAnalyticsException(msg)
|
|
||||||
self.googleanalytics_id = config['googleanalytics.id']
|
|
||||||
self.googleanalytics_domain = config.get(
|
|
||||||
'googleanalytics.domain', 'auto')
|
|
||||||
self.googleanalytics_fields = ast.literal_eval(config.get(
|
|
||||||
'googleanalytics.fields', '{}'))
|
|
||||||
|
|
||||||
googleanalytics_linked_domains = config.get(
|
|
||||||
'googleanalytics.linked_domains', ''
|
|
||||||
)
|
|
||||||
self.googleanalytics_linked_domains = [
|
|
||||||
x.strip() for x in googleanalytics_linked_domains.split(',') if x
|
|
||||||
]
|
|
||||||
|
|
||||||
if self.googleanalytics_linked_domains:
|
|
||||||
self.googleanalytics_fields['allowLinker'] = 'true'
|
|
||||||
|
|
||||||
self.googleanalytics_javascript_url = h.url_for_static(
|
|
||||||
'/scripts/ckanext-googleanalytics.js')
|
|
||||||
|
|
||||||
# If resource_prefix is not in config file then write the default value
|
|
||||||
# to the config dict, otherwise templates seem to get 'true' when they
|
|
||||||
# try to read resource_prefix from config.
|
|
||||||
if 'googleanalytics_resource_prefix' not in config:
|
|
||||||
config['googleanalytics_resource_prefix'] = (
|
|
||||||
commands.DEFAULT_RESOURCE_URL_TAG)
|
|
||||||
self.googleanalytics_resource_prefix = config[
|
|
||||||
'googleanalytics_resource_prefix']
|
|
||||||
|
|
||||||
self.show_downloads = converters.asbool(
|
|
||||||
config.get('googleanalytics.show_downloads', True))
|
|
||||||
self.track_events = converters.asbool(
|
|
||||||
config.get('googleanalytics.track_events', False))
|
|
||||||
self.enable_user_id = converters.asbool(
|
|
||||||
config.get('googleanalytics.enable_user_id', False))
|
|
||||||
|
|
||||||
if not converters.asbool(config.get('ckan.legacy_templates', 'false')):
|
|
||||||
p.toolkit.add_resource('fanstatic_library', 'ckanext-googleanalytics')
|
|
||||||
|
|
||||||
# spawn a pool of 5 threads, and pass them queue instance
|
|
||||||
for i in range(5):
|
|
||||||
t = AnalyticsPostThread(self.analytics_queue)
|
|
||||||
t.setDaemon(True)
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
|
|
||||||
def update_config(self, config):
|
|
||||||
'''Change the CKAN (Pylons) environment configuration.
|
|
||||||
|
|
||||||
See IConfigurer.
|
|
||||||
|
|
||||||
'''
|
|
||||||
if converters.asbool(config.get('ckan.legacy_templates', 'false')):
|
|
||||||
p.toolkit.add_template_directory(config, 'legacy_templates')
|
|
||||||
p.toolkit.add_public_directory(config, 'legacy_public')
|
|
||||||
else:
|
|
||||||
p.toolkit.add_template_directory(config, 'templates')
|
|
||||||
|
|
||||||
def before_map(self, map):
|
|
||||||
'''Add new routes that this extension's controllers handle.
|
|
||||||
|
|
||||||
See IRoutes.
|
|
||||||
|
|
||||||
'''
|
|
||||||
# Helpers to reduce code clutter
|
|
||||||
GET = dict(method=['GET'])
|
|
||||||
PUT = dict(method=['PUT'])
|
|
||||||
POST = dict(method=['POST'])
|
|
||||||
DELETE = dict(method=['DELETE'])
|
|
||||||
GET_POST = dict(method=['GET', 'POST'])
|
|
||||||
# intercept API calls that we want to capture analytics on
|
|
||||||
register_list = [
|
|
||||||
'package',
|
|
||||||
'dataset',
|
|
||||||
'resource',
|
|
||||||
'tag',
|
|
||||||
'group',
|
|
||||||
'related',
|
|
||||||
'revision',
|
|
||||||
'licenses',
|
|
||||||
'rating',
|
|
||||||
'user',
|
|
||||||
'activity'
|
|
||||||
]
|
|
||||||
register_list_str = '|'.join(register_list)
|
|
||||||
# /api ver 3 or none
|
|
||||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/3|}',
|
|
||||||
ver='/3') as m:
|
|
||||||
m.connect('/action/{logic_function}', action='action',
|
|
||||||
conditions=GET_POST)
|
|
||||||
|
|
||||||
# /api ver 1, 2, 3 or none
|
|
||||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|/3|}',
|
|
||||||
ver='/1') as m:
|
|
||||||
m.connect('/search/{register}', action='search')
|
|
||||||
|
|
||||||
# /api/rest ver 1, 2 or none
|
|
||||||
with SubMapper(map, controller='ckanext.googleanalytics.controller:GAApiController', path_prefix='/api{ver:/1|/2|}',
|
|
||||||
ver='/1', requirements=dict(register=register_list_str)
|
|
||||||
) as m:
|
|
||||||
|
|
||||||
m.connect('/rest/{register}', action='list', conditions=GET)
|
|
||||||
m.connect('/rest/{register}', action='create', conditions=POST)
|
|
||||||
m.connect('/rest/{register}/{id}', action='show', conditions=GET)
|
|
||||||
m.connect('/rest/{register}/{id}', action='update', conditions=PUT)
|
|
||||||
m.connect('/rest/{register}/{id}', action='update', conditions=POST)
|
|
||||||
m.connect('/rest/{register}/{id}', action='delete', conditions=DELETE)
|
|
||||||
|
|
||||||
return map
|
|
||||||
|
|
||||||
def after_map(self, map):
|
|
||||||
'''Add new routes that this extension's controllers handle.
|
|
||||||
|
|
||||||
See IRoutes.
|
|
||||||
|
|
||||||
'''
|
|
||||||
self.modify_resource_download_route(map)
|
|
||||||
map.redirect("/analytics/package/top", "/analytics/dataset/top")
|
|
||||||
map.connect(
|
|
||||||
'analytics', '/analytics/dataset/top',
|
|
||||||
controller='ckanext.googleanalytics.controller:GAController',
|
|
||||||
action='view'
|
|
||||||
)
|
|
||||||
return map
|
|
||||||
|
|
||||||
def get_helpers(self):
|
|
||||||
'''Return the CKAN 2.0 template helper functions this plugin provides.
|
|
||||||
|
|
||||||
See ITemplateHelpers.
|
|
||||||
|
|
||||||
'''
|
|
||||||
return {'googleanalytics_header': self.googleanalytics_header}
|
|
||||||
|
|
||||||
def googleanalytics_header(self):
|
|
||||||
'''Render the googleanalytics_header snippet for CKAN 2.0 templates.
|
|
||||||
|
|
||||||
This is a template helper function that renders the
|
|
||||||
googleanalytics_header jinja snippet. To be called from the jinja
|
|
||||||
templates in this extension, see ITemplateHelpers.
|
|
||||||
|
|
||||||
'''
|
|
||||||
|
|
||||||
if self.enable_user_id and c.user:
|
|
||||||
self.googleanalytics_fields['userId'] = str(c.userobj.id)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'googleanalytics_id': self.googleanalytics_id,
|
|
||||||
'googleanalytics_domain': self.googleanalytics_domain,
|
|
||||||
'googleanalytics_fields': str(self.googleanalytics_fields),
|
|
||||||
'googleanalytics_linked_domains': self.googleanalytics_linked_domains
|
|
||||||
}
|
|
||||||
return p.toolkit.render_snippet(
|
|
||||||
'googleanalytics/snippets/googleanalytics_header.html', data)
|
|
||||||
|
|
||||||
def modify_resource_download_route(self, map):
|
|
||||||
'''Modifies resource_download method in related controller
|
|
||||||
to attach GA tracking code.
|
|
||||||
'''
|
|
||||||
|
|
||||||
if '_routenames' in map.__dict__:
|
|
||||||
if 'resource_download' in map.__dict__['_routenames']:
|
|
||||||
route_data = map.__dict__['_routenames']['resource_download'].__dict__
|
|
||||||
route_controller = route_data['defaults']['controller'].split(
|
|
||||||
':')
|
|
||||||
module = importlib.import_module(route_controller[0])
|
|
||||||
controller_class = getattr(module, route_controller[1])
|
|
||||||
controller_class.resource_download = wrap_resource_download(
|
|
||||||
controller_class.resource_download)
|
|
||||||
else:
|
|
||||||
# If no custom uploader applied, use the default one
|
|
||||||
PackageController.resource_download = wrap_resource_download(
|
|
||||||
PackageController.resource_download)
|
|
|
@ -0,0 +1,163 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import ast
|
||||||
|
import logging
|
||||||
|
import urllib
|
||||||
|
import ckanext.googleanalytics.commands as commands
|
||||||
|
import paste.deploy.converters as converters
|
||||||
|
import ckan.lib.helpers as h
|
||||||
|
import ckan.plugins as p
|
||||||
|
import ckan.plugins.toolkit as tk
|
||||||
|
import urllib2
|
||||||
|
from ckan.exceptions import CkanVersionException
|
||||||
|
import threading
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
tk.requires_ckan_version("2.9")
|
||||||
|
except CkanVersionException:
|
||||||
|
from ckanext.googleanalytics.plugin.paster_plugin import GAMixinPlugin
|
||||||
|
else:
|
||||||
|
from ckanext.googleanalytics.plugin.flask_plugin import GAMixinPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleAnalyticsException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AnalyticsPostThread(threading.Thread):
|
||||||
|
"""Threaded Url POST"""
|
||||||
|
|
||||||
|
def __init__(self, queue):
|
||||||
|
threading.Thread.__init__(self)
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
# grabs host from queue
|
||||||
|
data_dict = self.queue.get()
|
||||||
|
|
||||||
|
data = urllib.urlencode(data_dict)
|
||||||
|
log.debug("Sending API event to Google Analytics: " + data)
|
||||||
|
# send analytics
|
||||||
|
urllib2.urlopen(
|
||||||
|
"http://www.google-analytics.com/collect",
|
||||||
|
data,
|
||||||
|
# timeout in seconds
|
||||||
|
# https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# signals to queue job is done
|
||||||
|
self.queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin):
|
||||||
|
p.implements(p.IConfigurable, inherit=True)
|
||||||
|
p.implements(p.IConfigurer, inherit=True)
|
||||||
|
p.implements(p.ITemplateHelpers)
|
||||||
|
|
||||||
|
def configure(self, config):
|
||||||
|
"""Load config settings for this extension from config file.
|
||||||
|
|
||||||
|
See IConfigurable.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if "googleanalytics.id" not in config:
|
||||||
|
msg = "Missing googleanalytics.id in config"
|
||||||
|
raise GoogleAnalyticsException(msg)
|
||||||
|
self.googleanalytics_id = config["googleanalytics.id"]
|
||||||
|
self.googleanalytics_domain = config.get(
|
||||||
|
"googleanalytics.domain", "auto"
|
||||||
|
)
|
||||||
|
self.googleanalytics_fields = ast.literal_eval(
|
||||||
|
config.get("googleanalytics.fields", "{}")
|
||||||
|
)
|
||||||
|
|
||||||
|
googleanalytics_linked_domains = config.get(
|
||||||
|
"googleanalytics.linked_domains", ""
|
||||||
|
)
|
||||||
|
self.googleanalytics_linked_domains = [
|
||||||
|
x.strip() for x in googleanalytics_linked_domains.split(",") if x
|
||||||
|
]
|
||||||
|
|
||||||
|
if self.googleanalytics_linked_domains:
|
||||||
|
self.googleanalytics_fields["allowLinker"] = "true"
|
||||||
|
|
||||||
|
self.googleanalytics_javascript_url = h.url_for_static(
|
||||||
|
"/scripts/ckanext-googleanalytics.js"
|
||||||
|
)
|
||||||
|
|
||||||
|
# If resource_prefix is not in config file then write the default value
|
||||||
|
# to the config dict, otherwise templates seem to get 'true' when they
|
||||||
|
# try to read resource_prefix from config.
|
||||||
|
if "googleanalytics_resource_prefix" not in config:
|
||||||
|
config[
|
||||||
|
"googleanalytics_resource_prefix"
|
||||||
|
] = commands.DEFAULT_RESOURCE_URL_TAG
|
||||||
|
self.googleanalytics_resource_prefix = config[
|
||||||
|
"googleanalytics_resource_prefix"
|
||||||
|
]
|
||||||
|
|
||||||
|
self.show_downloads = converters.asbool(
|
||||||
|
config.get("googleanalytics.show_downloads", True)
|
||||||
|
)
|
||||||
|
self.track_events = converters.asbool(
|
||||||
|
config.get("googleanalytics.track_events", False)
|
||||||
|
)
|
||||||
|
self.enable_user_id = converters.asbool(
|
||||||
|
config.get("googleanalytics.enable_user_id", False)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not converters.asbool(config.get("ckan.legacy_templates", "false")):
|
||||||
|
p.toolkit.add_resource(
|
||||||
|
"../fanstatic_library", "ckanext-googleanalytics"
|
||||||
|
)
|
||||||
|
|
||||||
|
# spawn a pool of 5 threads, and pass them queue instance
|
||||||
|
for i in range(5):
|
||||||
|
t = AnalyticsPostThread(self.analytics_queue)
|
||||||
|
t.setDaemon(True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
def update_config(self, config):
|
||||||
|
"""Change the CKAN (Pylons) environment configuration.
|
||||||
|
|
||||||
|
See IConfigurer.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if converters.asbool(config.get("ckan.legacy_templates", "false")):
|
||||||
|
p.toolkit.add_template_directory(config, "../legacy_templates")
|
||||||
|
p.toolkit.add_public_directory(config, "../legacy_public")
|
||||||
|
else:
|
||||||
|
p.toolkit.add_template_directory(config, "../templates")
|
||||||
|
|
||||||
|
def get_helpers(self):
|
||||||
|
"""Return the CKAN 2.0 template helper functions this plugin provides.
|
||||||
|
|
||||||
|
See ITemplateHelpers.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return {"googleanalytics_header": self.googleanalytics_header}
|
||||||
|
|
||||||
|
def googleanalytics_header(self):
|
||||||
|
"""Render the googleanalytics_header snippet for CKAN 2.0 templates.
|
||||||
|
|
||||||
|
This is a template helper function that renders the
|
||||||
|
googleanalytics_header jinja snippet. To be called from the jinja
|
||||||
|
templates in this extension, see ITemplateHelpers.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.enable_user_id and tk.c.user:
|
||||||
|
self.googleanalytics_fields["userId"] = str(tk.c.userobj.id)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"googleanalytics_id": self.googleanalytics_id,
|
||||||
|
"googleanalytics_domain": self.googleanalytics_domain,
|
||||||
|
"googleanalytics_fields": str(self.googleanalytics_fields),
|
||||||
|
"googleanalytics_linked_domains": self.googleanalytics_linked_domains,
|
||||||
|
}
|
||||||
|
return p.toolkit.render_snippet(
|
||||||
|
"googleanalytics/snippets/googleanalytics_header.html", data
|
||||||
|
)
|
|
@ -0,0 +1,15 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import Queue
|
||||||
|
|
||||||
|
import ckan.plugins as plugins
|
||||||
|
|
||||||
|
from ckanext.googleanalytics.views import ga
|
||||||
|
|
||||||
|
|
||||||
|
class GAMixinPlugin(plugins.SingletonPlugin):
|
||||||
|
plugins.implements(plugins.IBlueprint)
|
||||||
|
|
||||||
|
analytics_queue = Queue.Queue()
|
||||||
|
|
||||||
|
def get_blueprint(self):
|
||||||
|
return [ga]
|
|
@ -0,0 +1,165 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import Queue
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
import ckan.plugins as plugins
|
||||||
|
import ckan.plugins.toolkit as tk
|
||||||
|
|
||||||
|
from ckan.controllers.package import PackageController
|
||||||
|
from pylons import config
|
||||||
|
from routes.mapper import SubMapper
|
||||||
|
|
||||||
|
|
||||||
|
class GAMixinPlugin(plugins.SingletonPlugin):
|
||||||
|
plugins.implements(plugins.IRoutes)
|
||||||
|
|
||||||
|
analytics_queue = Queue.Queue()
|
||||||
|
|
||||||
|
def before_map(self, map):
|
||||||
|
"""Add new routes that this extension's controllers handle.
|
||||||
|
|
||||||
|
See IRoutes.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Helpers to reduce code clutter
|
||||||
|
GET = dict(method=["GET"])
|
||||||
|
PUT = dict(method=["PUT"])
|
||||||
|
POST = dict(method=["POST"])
|
||||||
|
DELETE = dict(method=["DELETE"])
|
||||||
|
GET_POST = dict(method=["GET", "POST"])
|
||||||
|
# intercept API calls that we want to capture analytics on
|
||||||
|
register_list = [
|
||||||
|
"package",
|
||||||
|
"dataset",
|
||||||
|
"resource",
|
||||||
|
"tag",
|
||||||
|
"group",
|
||||||
|
"related",
|
||||||
|
"revision",
|
||||||
|
"licenses",
|
||||||
|
"rating",
|
||||||
|
"user",
|
||||||
|
"activity",
|
||||||
|
]
|
||||||
|
register_list_str = "|".join(register_list)
|
||||||
|
# /api ver 3 or none
|
||||||
|
with SubMapper(
|
||||||
|
map,
|
||||||
|
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||||
|
path_prefix="/api{ver:/3|}",
|
||||||
|
ver="/3",
|
||||||
|
) as m:
|
||||||
|
m.connect(
|
||||||
|
"/action/{logic_function}",
|
||||||
|
action="action",
|
||||||
|
conditions=GET_POST,
|
||||||
|
)
|
||||||
|
|
||||||
|
# /api ver 1, 2, 3 or none
|
||||||
|
with SubMapper(
|
||||||
|
map,
|
||||||
|
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||||
|
path_prefix="/api{ver:/1|/2|/3|}",
|
||||||
|
ver="/1",
|
||||||
|
) as m:
|
||||||
|
m.connect("/search/{register}", action="search")
|
||||||
|
|
||||||
|
# /api/rest ver 1, 2 or none
|
||||||
|
with SubMapper(
|
||||||
|
map,
|
||||||
|
controller="ckanext.googleanalytics.controller:GAApiController",
|
||||||
|
path_prefix="/api{ver:/1|/2|}",
|
||||||
|
ver="/1",
|
||||||
|
requirements=dict(register=register_list_str),
|
||||||
|
) as m:
|
||||||
|
|
||||||
|
m.connect("/rest/{register}", action="list", conditions=GET)
|
||||||
|
m.connect("/rest/{register}", action="create", conditions=POST)
|
||||||
|
m.connect("/rest/{register}/{id}", action="show", conditions=GET)
|
||||||
|
m.connect("/rest/{register}/{id}", action="update", conditions=PUT)
|
||||||
|
m.connect(
|
||||||
|
"/rest/{register}/{id}", action="update", conditions=POST
|
||||||
|
)
|
||||||
|
m.connect(
|
||||||
|
"/rest/{register}/{id}", action="delete", conditions=DELETE
|
||||||
|
)
|
||||||
|
|
||||||
|
return map
|
||||||
|
|
||||||
|
def after_map(self, map):
|
||||||
|
"""Add new routes that this extension's controllers handle.
|
||||||
|
|
||||||
|
See IRoutes.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self._modify_resource_download_route(map)
|
||||||
|
map.redirect("/analytics/package/top", "/analytics/dataset/top")
|
||||||
|
map.connect(
|
||||||
|
"analytics",
|
||||||
|
"/analytics/dataset/top",
|
||||||
|
controller="ckanext.googleanalytics.controller:GAController",
|
||||||
|
action="view",
|
||||||
|
)
|
||||||
|
return map
|
||||||
|
|
||||||
|
def _modify_resource_download_route(self, map):
|
||||||
|
"""Modifies resource_download method in related controller
|
||||||
|
to attach GA tracking code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if "_routenames" in map.__dict__:
|
||||||
|
if "resource_download" in map.__dict__["_routenames"]:
|
||||||
|
route_data = map.__dict__["_routenames"][
|
||||||
|
"resource_download"
|
||||||
|
].__dict__
|
||||||
|
route_controller = route_data["defaults"]["controller"].split(
|
||||||
|
":"
|
||||||
|
)
|
||||||
|
module = importlib.import_module(route_controller[0])
|
||||||
|
controller_class = getattr(module, route_controller[1])
|
||||||
|
controller_class.resource_download = wrap_resource_download(
|
||||||
|
controller_class.resource_download
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# If no custom uploader applied, use the default one
|
||||||
|
PackageController.resource_download = wrap_resource_download(
|
||||||
|
PackageController.resource_download
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_resource_download(func):
|
||||||
|
def func_wrapper(cls, id, resource_id, filename=None):
|
||||||
|
_post_analytics(
|
||||||
|
tk.c.user,
|
||||||
|
"CKAN Resource Download Request",
|
||||||
|
"Resource",
|
||||||
|
"Download",
|
||||||
|
resource_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return func(cls, id, resource_id, filename=None)
|
||||||
|
|
||||||
|
return func_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
def _post_analytics(
|
||||||
|
user, event_type, request_obj_type, request_function, request_id
|
||||||
|
):
|
||||||
|
|
||||||
|
if config.get("googleanalytics.id"):
|
||||||
|
data_dict = {
|
||||||
|
"v": 1,
|
||||||
|
"tid": config.get("googleanalytics.id"),
|
||||||
|
"cid": hashlib.md5(tk.c.user).hexdigest(),
|
||||||
|
# customer id should be obfuscated
|
||||||
|
"t": "event",
|
||||||
|
"dh": tk.c.environ["HTTP_HOST"],
|
||||||
|
"dp": tk.c.environ["PATH_INFO"],
|
||||||
|
"dr": tk.c.environ.get("HTTP_REFERER", ""),
|
||||||
|
"ec": event_type,
|
||||||
|
"ea": request_obj_type + request_function,
|
||||||
|
"el": request_id,
|
||||||
|
}
|
||||||
|
GAMixinPlugin.analytics_queue.put(data_dict)
|
|
@ -0,0 +1,89 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from flask import Blueprint
|
||||||
|
import hashlib
|
||||||
|
import ckan.views.api as api
|
||||||
|
import ckan.views.resource as resource
|
||||||
|
import ckan.logic as logic
|
||||||
|
import logging
|
||||||
|
from ckan.common import g
|
||||||
|
import ckan.plugins.toolkit as tk
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
ga = Blueprint(u"google_analytics", "google_analytics",)
|
||||||
|
|
||||||
|
|
||||||
|
def action(logic_function, ver=api.API_MAX_VERSION):
|
||||||
|
try:
|
||||||
|
function = logic.get_action(logic_function)
|
||||||
|
side_effect_free = getattr(function, "side_effect_free", False)
|
||||||
|
request_data = api._get_request_data(try_url_params=side_effect_free)
|
||||||
|
if isinstance(request_data, dict):
|
||||||
|
id = request_data.get("id", "")
|
||||||
|
if "q" in request_data:
|
||||||
|
id = request_data["q"]
|
||||||
|
if "query" in request_data:
|
||||||
|
id = request_data["query"]
|
||||||
|
_post_analytics(g.user, "CKAN API Request", logic_function, "", id)
|
||||||
|
except Exception, e:
|
||||||
|
log.debug(e)
|
||||||
|
pass
|
||||||
|
|
||||||
|
return api.action(logic_function, ver)
|
||||||
|
|
||||||
|
|
||||||
|
ga.add_url_rule(
|
||||||
|
u"/api/action/<logic_function>",
|
||||||
|
methods=[u"GET", u"POST"],
|
||||||
|
view_func=action,
|
||||||
|
)
|
||||||
|
ga.add_url_rule(
|
||||||
|
u"/<int(min=3, max={0}):ver>/action/<logic_function>".format(
|
||||||
|
api.API_MAX_VERSION
|
||||||
|
),
|
||||||
|
methods=[u"GET", u"POST"],
|
||||||
|
view_func=action,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def download(id, resource_id, filename=None, package_type="dataset"):
|
||||||
|
_post_analytics(
|
||||||
|
g.user,
|
||||||
|
"CKAN Resource Download Request",
|
||||||
|
"Resource",
|
||||||
|
"Download",
|
||||||
|
resource_id,
|
||||||
|
)
|
||||||
|
return resource.download(package_type, id, resource_id, filename)
|
||||||
|
|
||||||
|
|
||||||
|
ga.add_url_rule(
|
||||||
|
u"/dataset/<id>/resource/<resource_id>/download", view_func=download
|
||||||
|
)
|
||||||
|
ga.add_url_rule(
|
||||||
|
u"/dataset/<id>/resource/<resource_id>/download/<filename>",
|
||||||
|
view_func=download,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _post_analytics(
|
||||||
|
user, event_type, request_obj_type, request_function, request_id
|
||||||
|
):
|
||||||
|
|
||||||
|
from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin
|
||||||
|
|
||||||
|
if tk.config.get("googleanalytics.id"):
|
||||||
|
data_dict = {
|
||||||
|
"v": 1,
|
||||||
|
"tid": tk.config.get("googleanalytics.id"),
|
||||||
|
"cid": hashlib.md5(tk.c.user).hexdigest(),
|
||||||
|
# customer id should be obfuscated
|
||||||
|
"t": "event",
|
||||||
|
"dh": tk.request.environ["HTTP_HOST"],
|
||||||
|
"dp": tk.request.environ["PATH_INFO"],
|
||||||
|
"dr": tk.request.environ.get("HTTP_REFERER", ""),
|
||||||
|
"ec": event_type,
|
||||||
|
"ea": request_obj_type + request_function,
|
||||||
|
"el": request_id,
|
||||||
|
}
|
||||||
|
GoogleAnalyticsPlugin.analytics_queue.put(data_dict)
|
37
setup.py
37
setup.py
|
@ -1,28 +1,25 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
version = '0.1'
|
version = "0.1"
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='ckanext-googleanalytics',
|
name="ckanext-googleanalytics",
|
||||||
version=version,
|
version=version,
|
||||||
description="Add GA tracking and reporting to CKAN instance",
|
description="Add GA tracking and reporting to CKAN instance",
|
||||||
long_description="""\
|
long_description="""\
|
||||||
""",
|
""",
|
||||||
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||||
keywords='',
|
keywords="",
|
||||||
author='Seb Bacon',
|
author="Seb Bacon",
|
||||||
author_email='seb.bacon@gmail.com',
|
author_email="seb.bacon@gmail.com",
|
||||||
url='',
|
url="",
|
||||||
license='',
|
license="",
|
||||||
packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
|
packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
|
||||||
namespace_packages=['ckanext', 'ckanext.googleanalytics'],
|
namespace_packages=["ckanext", "ckanext.googleanalytics"],
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
install_requires=[
|
install_requires=[],
|
||||||
|
entry_points="""
|
||||||
],
|
|
||||||
entry_points=\
|
|
||||||
"""
|
|
||||||
[ckan.plugins]
|
[ckan.plugins]
|
||||||
# Add plugins here, eg
|
# Add plugins here, eg
|
||||||
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin
|
googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin
|
||||||
|
|
|
@ -53,9 +53,8 @@ class ReusableServer(BaseHTTPServer.HTTPServer):
|
||||||
|
|
||||||
|
|
||||||
def runmockserver():
|
def runmockserver():
|
||||||
server_address = ('localhost', 6969)
|
server_address = ("localhost", 6969)
|
||||||
httpd = ReusableServer(server_address,
|
httpd = ReusableServer(server_address, MockHandler)
|
||||||
MockHandler)
|
|
||||||
httpd_thread = threading.Thread(target=httpd.serve_til_quit)
|
httpd_thread = threading.Thread(target=httpd.serve_til_quit)
|
||||||
httpd_thread.setDaemon(True)
|
httpd_thread.setDaemon(True)
|
||||||
httpd_thread.start()
|
httpd_thread.start()
|
||||||
|
|
|
@ -15,7 +15,7 @@ import ckanext.googleanalytics.gasnippet as gasnippet
|
||||||
|
|
||||||
class MockClient(httplib.HTTPConnection):
|
class MockClient(httplib.HTTPConnection):
|
||||||
def request(self, http_request):
|
def request(self, http_request):
|
||||||
filters = http_request.uri.query.get('filters')
|
filters = http_request.uri.query.get("filters")
|
||||||
path = http_request.uri.path
|
path = http_request.uri.path
|
||||||
if filters:
|
if filters:
|
||||||
if "dataset" in filters:
|
if "dataset" in filters:
|
||||||
|
@ -29,9 +29,9 @@ class MockClient(httplib.HTTPConnection):
|
||||||
|
|
||||||
class TestConfig(TestCase):
|
class TestConfig(TestCase):
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
config = appconfig('config:test.ini', relative_to=conf_dir)
|
config = appconfig("config:test.ini", relative_to=conf_dir)
|
||||||
config.local_conf['ckan.plugins'] = 'googleanalytics'
|
config.local_conf["ckan.plugins"] = "googleanalytics"
|
||||||
config.local_conf['googleanalytics.id'] = ''
|
config.local_conf["googleanalytics.id"] = ""
|
||||||
command = LoadAnalytics("loadanalytics")
|
command = LoadAnalytics("loadanalytics")
|
||||||
command.CONFIG = config.local_conf
|
command.CONFIG = config.local_conf
|
||||||
self.assertRaises(Exception, command.run, [])
|
self.assertRaises(Exception, command.run, [])
|
||||||
|
@ -42,16 +42,19 @@ class TestLoadCommand(TestCase):
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
InitDB("initdb").run([]) # set up database tables
|
InitDB("initdb").run([]) # set up database tables
|
||||||
|
|
||||||
config = appconfig('config:test.ini', relative_to=conf_dir)
|
config = appconfig("config:test.ini", relative_to=conf_dir)
|
||||||
config.local_conf['ckan.plugins'] = 'googleanalytics'
|
config.local_conf["ckan.plugins"] = "googleanalytics"
|
||||||
config.local_conf['googleanalytics.username'] = 'borf'
|
config.local_conf["googleanalytics.username"] = "borf"
|
||||||
config.local_conf['googleanalytics.password'] = 'borf'
|
config.local_conf["googleanalytics.password"] = "borf"
|
||||||
config.local_conf['googleanalytics.id'] = 'UA-borf-1'
|
config.local_conf["googleanalytics.id"] = "UA-borf-1"
|
||||||
config.local_conf['googleanalytics.show_downloads'] = 'true'
|
config.local_conf["googleanalytics.show_downloads"] = "true"
|
||||||
cls.config = config.local_conf
|
cls.config = config.local_conf
|
||||||
wsgiapp = make_app(config.global_conf, **config.local_conf)
|
wsgiapp = make_app(config.global_conf, **config.local_conf)
|
||||||
env = {'HTTP_ACCEPT': ('text/html;q=0.9,text/plain;'
|
env = {
|
||||||
'q=0.8,image/png,*/*;q=0.5')}
|
"HTTP_ACCEPT": (
|
||||||
|
"text/html;q=0.9,text/plain;" "q=0.8,image/png,*/*;q=0.5"
|
||||||
|
)
|
||||||
|
}
|
||||||
cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env)
|
cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env)
|
||||||
CreateTestData.create()
|
CreateTestData.create()
|
||||||
runmockserver()
|
runmockserver()
|
||||||
|
@ -64,14 +67,16 @@ class TestLoadCommand(TestCase):
|
||||||
conn.getresponse()
|
conn.getresponse()
|
||||||
|
|
||||||
def test_analytics_snippet(self):
|
def test_analytics_snippet(self):
|
||||||
response = self.app.get(url_for(controller='tag', action='index'))
|
response = self.app.get(url_for(controller="tag", action="index"))
|
||||||
code = gasnippet.header_code % (self.config['googleanalytics.id'],
|
code = gasnippet.header_code % (
|
||||||
'auto')
|
self.config["googleanalytics.id"],
|
||||||
|
"auto",
|
||||||
|
)
|
||||||
assert code in response.body
|
assert code in response.body
|
||||||
|
|
||||||
def test_top_packages(self):
|
def test_top_packages(self):
|
||||||
command = LoadAnalytics("loadanalytics")
|
command = LoadAnalytics("loadanalytics")
|
||||||
command.TEST_HOST = MockClient('localhost', 6969)
|
command.TEST_HOST = MockClient("localhost", 6969)
|
||||||
command.CONFIG = self.config
|
command.CONFIG = self.config
|
||||||
command.run([])
|
command.run([])
|
||||||
packages = dbutil.get_top_packages()
|
packages = dbutil.get_top_packages()
|
||||||
|
@ -81,31 +86,37 @@ class TestLoadCommand(TestCase):
|
||||||
|
|
||||||
def test_download_count_inserted(self):
|
def test_download_count_inserted(self):
|
||||||
command = LoadAnalytics("loadanalytics")
|
command = LoadAnalytics("loadanalytics")
|
||||||
command.TEST_HOST = MockClient('localhost', 6969)
|
command.TEST_HOST = MockClient("localhost", 6969)
|
||||||
command.CONFIG = self.config
|
command.CONFIG = self.config
|
||||||
command.run([])
|
command.run([])
|
||||||
response = self.app.get(url_for(
|
response = self.app.get(
|
||||||
controller='package', action='read', id='annakarenina'
|
url_for(controller="package", action="read", id="annakarenina")
|
||||||
))
|
)
|
||||||
assert "[downloaded 4 times]" in response.body
|
assert "[downloaded 4 times]" in response.body
|
||||||
|
|
||||||
def test_js_inserted_resource_view(self):
|
def test_js_inserted_resource_view(self):
|
||||||
from nose import SkipTest
|
from nose import SkipTest
|
||||||
|
|
||||||
raise SkipTest("Test won't work until CKAN 1.5.2")
|
raise SkipTest("Test won't work until CKAN 1.5.2")
|
||||||
|
|
||||||
from ckan.logic.action import get
|
from ckan.logic.action import get
|
||||||
from ckan import model
|
from ckan import model
|
||||||
context = {'model': model, 'ignore_auth': True}
|
|
||||||
data = {'id': 'annakarenina'}
|
context = {"model": model, "ignore_auth": True}
|
||||||
|
data = {"id": "annakarenina"}
|
||||||
pkg = get.package_show(context, data)
|
pkg = get.package_show(context, data)
|
||||||
resource_id = pkg['resources'][0]['id']
|
resource_id = pkg["resources"][0]["id"]
|
||||||
|
|
||||||
command = LoadAnalytics("loadanalytics")
|
command = LoadAnalytics("loadanalytics")
|
||||||
command.TEST_HOST = MockClient('localhost', 6969)
|
command.TEST_HOST = MockClient("localhost", 6969)
|
||||||
command.CONFIG = self.config
|
command.CONFIG = self.config
|
||||||
command.run([])
|
command.run([])
|
||||||
response = self.app.get(url_for(
|
response = self.app.get(
|
||||||
controller='package', action='resource_read', id='annakarenina',
|
url_for(
|
||||||
resource_id=resource_id
|
controller="package",
|
||||||
))
|
action="resource_read",
|
||||||
|
id="annakarenina",
|
||||||
|
resource_id=resource_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
assert 'onclick="javascript: _gaq.push(' in response.body
|
assert 'onclick="javascript: _gaq.push(' in response.body
|
||||||
|
|
Loading…
Reference in New Issue