From 861833bf82c3e03fe325a75a51c881c1196418aa Mon Sep 17 00:00:00 2001 From: Stijn Debrouwere Date: Fri, 7 Jun 2013 17:40:23 +0100 Subject: [PATCH] Initial commit. --- .gitignore | 3 + README.md | 96 +++++++++++++++ omniture/__init__.py | 279 +++++++++++++++++++++++++++++++++++++++++++ omniture/utils.py | 53 ++++++++ setup.py | 24 ++++ test.py | 41 +++++++ 6 files changed, 496 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 omniture/__init__.py create mode 100644 omniture/utils.py create mode 100644 setup.py create mode 100644 test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c5ddc3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +*.pyc +*.egg-info diff --git a/README.md b/README.md new file mode 100644 index 0000000..f6ce200 --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +# python-omniture + +`python-omniture` is a wrapper around the Adobe Omniture web analytics API. + +It is not meant to be comprehensive. Instead, it provides a high-level interface +to certain common kinds of queries, and allows you to do construct other queries +closer to the metal. + +## Installation + + pip install git+git://github.com/stdbrouw/python-omniture.git + +`python-omniture` is currently not available through `PyPI` but will be +once the module matures. + +## Authentication + +The post straightforward way to authenticate is with: + + import omniture + analytics = omniture.Account() + analytics.authenticate('my_username', 'my_secret') + +However, to avoid hardcoding passwords, instead you can also put your username +and password in unix environment variables (e.g. in your `.bashrc`): + + export OMNITURE_USERNAME=my_username + export OMNITURE_SECRET=my_secret + +With your credentials in the environment, you can then log in as follows: + + import os + import omniture + analytics = omniture.Account() + analytics.authenticate(os.environ) + +## Account and suites + +You can very easily access some basic information about your account and your +reporting suites: + + print analytics.suites + suite = analytics.suites['guardiangu-network'] + print suite + print len(suite.evars) + print suite.segments + print suite.elements + +You can refer to suites, segments, elements and so on using both their +human-readable name or their id. So for example `suite.segments['Guardian Network - Global Rollup']` and `suite.segments['guardiangu-network']` will work exactly the same. This is especially useful in cases when segment or metric identifiers are long strings of gibberish. + +## Running a report + +`python-omniture` can run ranked, trended and "over time" reports + +* over_time + * supports multiple metrics but only one element: time + * useful if you need information on a per-page basis +* ranked + * ranks pages in relation to the metric + * one number (per metric) for the entire reporting period +* trended + * movement of a single element and metric over time (e.g. visits to world news over time) + +It's still early days -- expect some things not to work. + + report = network.report \ + .over_time(metrics=['pageviews', 'visitors']) \ + .range('2013-05-01', '2013-05-31', granularity='month') + .sync() + +Accessing the data in a report works as follows: + + report.data['pageviews'] + +### Running multiple reports + +If you're interested in automating a large number of reports, you can speed up the +execution by first queueing all the reports and only then waiting on the results. + +Here's an example: + + queue = [] + for segment in segments: + report = network.report \ + .range('2013-05-01', '2013-05-31', granularity='day') \ + .over_time(metrics=['pageviews']) \ + .filter(segment=segment) + queue.append(report) + + heartbeat = lambda: sys.stdout.write('.') + reports = omniture.sync(queue, heartbeat) + + for report in reports: + print report.segment + print report.data['pageviews'] \ No newline at end of file diff --git a/omniture/__init__.py b/omniture/__init__.py new file mode 100644 index 0000000..a11f761 --- /dev/null +++ b/omniture/__init__.py @@ -0,0 +1,279 @@ +import requests +import binascii +from datetime import datetime +import time +import sha +import json +import utils + + +class Value(object): + def __init__(self, title, id, properties={}): + self.title = title + self.id = id + + for k, v in properties.items(): + setattr(self, k, v) + + @classmethod + def list(self, name, items, title='title', id='id'): + values = [Value(item[title], item[id], item) for item in items] + return utils.AddressableList(values, name) + + def __repr__(self): + return "<{title}: {id}>".format(**self.__dict__) + + +class Account(object): + def __init__(self, endpoint='https://api.omniture.com/admin/1.3/rest/'): + self.endpoint = endpoint + + def initialize(self): + data = self.request('Company', 'GetReportSuites')['report_suites'] + suites = [Suite(suite['site_title'], suite['rsid'], self) for suite in data] + self.suites = utils.AddressableList(suites) + + def request(self, api, method, query={}): + response = requests.post( + self.endpoint, + params={'method': api + '.' + method}, + data=json.dumps(query), + headers=self._build_token() + ) + return response.json() + + def _serialize_header(self, properties): + header = [] + for key, value in properties.items(): + header.append('{key}="{value}"'.format(key=key, value=value)) + return ', '.join(header) + + def _build_token(self): + nonce = str(time.time()) + base64nonce = binascii.b2a_base64(binascii.a2b_qp(nonce)) + created_date = datetime.today().isoformat() + 'Z' + sha_object = sha.new(nonce + created_date + self.secret) + password_64 = binascii.b2a_base64(sha_object.digest()) + + properties = { + "Username": self.username, + "PasswordDigest": password_64.strip(), + "Nonce": base64nonce.strip(), + "Created": created_date, + } + header = 'UsernameToken ' + self._serialize_header(properties) + + return {'X-WSSE': header} + + def authenticate(self, username, secret=None, prefix='', suffix=''): + if secret: + self.username = username + self.secret = secret + else: + source = username + username = utils.affix(prefix, 'OMNITURE_USERNAME', suffix) + secret = utils.affix(prefix, 'OMNITURE_SECRET', suffix) + self.username = source[username] + self.secret = source[secret] + + self.initialize() + + +class Suite(Value): + def request(self, api, method, query={}): + raw_query = {} + raw_query.update(query) + if 'reportDescription' in raw_query: + raw_query['reportDescription']['reportSuiteID'] = self.id + elif api == 'ReportSuite': + raw_query['rsid_list'] = [self.id] + + return self.account.request(api, method, raw_query) + + def __init__(self, title, id, account): + super(Suite, self).__init__(title, id) + + self.account = account + + @property + @utils.memoize + def metrics(self): + data = self.request('ReportSuite', 'GetAvailableMetrics')[0]['available_metrics'] + return Value.list('metrics', data, 'display_name', 'metric_name') + + @property + @utils.memoize + def elements(self): + data = self.request('ReportSuite', 'GetAvailableElements')[0]['available_elements'] + return Value.list('elements', data, 'display_name', 'element_name') + + @property + @utils.memoize + def evars(self): + data = self.request('ReportSuite', 'GetEVars')[0]['evars'] + return Value.list('evars', data, 'name', 'evar_num') + + @property + @utils.memoize + def segments(self): + data = self.request('ReportSuite', 'GetSegments')[0]['sc_segments'] + return Value.list('segments', data, 'name', 'id') + + @property + def report(self): + return Query(self) + + +class Query(object): + def __init__(self, suite): + self.suite = suite + self.raw = {} + self.id = None + + def _get_key(self, value, category, expand=False): + if not isinstance(value, Value): + value = getattr(self.suite, category)[value] + + if expand: + kv = {} + kv[expand] = value.id + return kv + else: + return value.id + + def range(self, start, stop=None, granularity='day'): + stop = stop or start + + if start == stop: + self.raw['date'] = start + else: + self.raw.update({ + 'dateFrom': start, + 'dateTo': stop, + }) + + self.raw['dateGranularity'] = granularity + + return self + + def raw(self, properties): + self.raw.update(properties) + return self + + def set(self, key, value): + self.raw[key] = value + return self + + def sort(self, facet): + #self.raw['sortBy'] = facet + raise NotImplementedError() + return self + + def filter(self, segment=None, element=None): + if segment: + self.raw['segment_id'] = self._get_key(segment, 'segments') + + if element: + raise NotImplementedError() + + return self + + def ranked(self, metric): + self.raw['metrics'] = [self._get_key(metric, 'metrics', expand='id')] + self.method = 'QueueRanked' + return self + + def trended(self, metric, element): + self.method = 'QueueTrended' + return self + + def over_time(self, metrics): + self.method = 'QueueOvertime' + self.raw['metrics'] = [self._get_key(metric, 'metrics', expand='id') for metric in metrics] + return self + + def build(self): + return {'reportDescription': self.raw} + + def queue(self): + q = self.build() + self.id = self.suite.request('Report', self.method, q)['reportID'] + return self + + def probe(self, fn, heartbeat=None, interval=1): + status = '' + while status not in ['done', 'ready']: + if heartbeat: + heartbeat() + time.sleep(interval) + response = fn() + status = response['status'] + + return response + + def sync(self, heartbeat=None, interval=1): + if not self.id: + self.queue() + + # this looks clunky, but Omniture sometimes reports a report + # as ready when it's really not + status = lambda: self.suite.request('Report', 'GetStatus', {'reportID': self.id}) + report = lambda: self.suite.request('Report', 'GetReport', {'reportID': self.id}) + self.probe(status, heartbeat, interval) + response = self.probe(report, heartbeat, interval) + return Report(response, self) + + def async(self, callback=None, heartbeat=None, interval=1): + if not self.id: + self.queue() + + raise NotImplementedError() + + def cancel(self): + return self.suite.request('Report', 'CancelReport', {'reportID': self.id}) + + +# TODO: also make this iterable (go through rows) +class Report(object): + def process(self): + self.status = self.raw['status'] + self.timing = { + 'queue': float(self.raw['waitSeconds']), + 'execution': float(self.raw['runSeconds']), + } + self.report = report = self.raw['report'] + self.metrics = Value.list('metrics', report['metrics'], 'name', 'id') + self.elements = Value.list('elements', report['elements'], 'name', 'id') + self.period = report['period'] + segment = report['segment_id'] + if len(segment): + self.segment = self.query.suite.segments[report['segment_id']] + else: + self.segment = None + + self.data = utils.AddressableDict(self.metrics) + for column in self.data: + column.value = [] + + for row in report['data']: + for i, value in enumerate(row['counts']): + if self.metrics[i].type == 'number': + value = float(value) + self.data[i].append(value) + + def to_dataframe(self): + import pandas as pd + raise NotImplementedError() + # return pd.DataFrame() + + def __init__(self, raw, query): + self.raw = raw + self.query = query + self.process() + + +def sync(queries, heartbeat=None, interval=1): + for query in queries: + query.queue() + + return [query.sync(heartbeat, interval) for query in queries] diff --git a/omniture/utils.py b/omniture/utils.py new file mode 100644 index 0000000..a93ee3c --- /dev/null +++ b/omniture/utils.py @@ -0,0 +1,53 @@ +class memoize: + def __init__(self, function): + self.function = function + self.memoized = {} + + def __call__(self, *args): + try: + return self.memoized[args] + except KeyError: + self.memoized[args] = self.function(*args) + return self.memoized[args] + + +class AddressableList(list): + def __init__(self, items, name='items'): + super(AddressableList, self).__init__(items) + self.name = name + + def __getitem__(self, key): + if isinstance(key, int): + return super(AddressableList, self).__getitem__(key) + else: + matches = [item for item in self if item.title == key or item.id == key] + count = len(matches) + if count > 1: + matches = map(repr, matches) + error = "Found multiple matches for {key}: {matches}. ".format( + key=key, matches=", ".join(matches)) + advice = "Use the identifier instead." + raise KeyError(error + advice) + elif count == 1: + return matches[0] + else: + raise KeyError("Cannot find {key} among the available {name}".format( + key=key, name=self.name)) + +class AddressableDict(AddressableList): + def __getitem__(self, key): + item = super(AddressableDict, self).__getitem__(key) + return item.value + +def affix(prefix, base, suffix, connector='_'): + if prefix: + prefix = prefix + connector + else: + prefix = '' + + if suffix: + suffix = connector + suffix + else: + suffix = '' + + return prefix + base + suffix \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5db59dd --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +from setuptools import setup, find_packages + +setup(name='python-omniture', + description='A wrapper for the Adobe Omniture and SiteCatalyst web analytics API.', + long_description=open('README.md').read(), + author='Stijn Debrouwere', + author_email='stijn@stdout.be', + url='http://stdbrouw.github.com/python-omniture/', + download_url='http://www.github.com/stdbrouw/python-omniture/tarball/master', + version='0.1', + license='MIT', + packages=find_packages(), + keywords='data analytics api wrapper adobe', + install_requires=[ + 'requests', + ], + classifiers=['Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Scientific/Engineering :: Information Analysis', + ], + ) \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..b269284 --- /dev/null +++ b/test.py @@ -0,0 +1,41 @@ +import omniture +import sys +import os +from pprint import pprint + +analytics = omniture.Account() +analytics.authenticate(os.environ) + +#print analytics.suites +print analytics.suites['guardiangu-parioli-taste-of-rome'] +print analytics.suites['Media Prof Network'] +network = analytics.suites['guardiangu-network'] +print len(network.evars) +#pprint(network.segments) +print network.segments['First Time Visitors'] + +segments = [ + 'UK (Locked)', + 'US (Locked)', + ] + +queue = [] + +for segment in segments: + report = network.report \ + .range('2013-05-01', '2013-05-31', granularity='day') \ + .over_time(metrics=['pageviews']) \ + .filter(segment=segment) + + queue.append(report) + +def heartbeat(): + sys.stdout.write('.') + sys.stdout.flush() + +reports = omniture.sync(queue, heartbeat) + +for report in reports: + print report.segment + print report.data['pageviews'] +