forked from debrouwere/python-omniture
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 861833b
Showing
6 changed files
with
496 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.DS_Store | ||
*.pyc | ||
*.egg-info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# python-omniture | ||
|
||
`python-omniture` is a wrapper around the Adobe Omniture web analytics API. | ||
|
||
It is not meant to be comprehensive. Instead, it provides a high-level interface | ||
to certain common kinds of queries, and allows you to do construct other queries | ||
closer to the metal. | ||
|
||
## Installation | ||
|
||
pip install git+git://github.com/stdbrouw/python-omniture.git | ||
|
||
`python-omniture` is currently not available through `PyPI` but will be | ||
once the module matures. | ||
|
||
## Authentication | ||
|
||
The post straightforward way to authenticate is with: | ||
|
||
import omniture | ||
analytics = omniture.Account() | ||
analytics.authenticate('my_username', 'my_secret') | ||
|
||
However, to avoid hardcoding passwords, instead you can also put your username | ||
and password in unix environment variables (e.g. in your `.bashrc`): | ||
|
||
export OMNITURE_USERNAME=my_username | ||
export OMNITURE_SECRET=my_secret | ||
|
||
With your credentials in the environment, you can then log in as follows: | ||
|
||
import os | ||
import omniture | ||
analytics = omniture.Account() | ||
analytics.authenticate(os.environ) | ||
|
||
## Account and suites | ||
|
||
You can very easily access some basic information about your account and your | ||
reporting suites: | ||
|
||
print analytics.suites | ||
suite = analytics.suites['guardiangu-network'] | ||
print suite | ||
print len(suite.evars) | ||
print suite.segments | ||
print suite.elements | ||
|
||
You can refer to suites, segments, elements and so on using both their | ||
human-readable name or their id. So for example `suite.segments['Guardian Network - Global Rollup']` and `suite.segments['guardiangu-network']` will work exactly the same. This is especially useful in cases when segment or metric identifiers are long strings of gibberish. | ||
|
||
## Running a report | ||
|
||
`python-omniture` can run ranked, trended and "over time" reports | ||
|
||
* over_time | ||
* supports multiple metrics but only one element: time | ||
* useful if you need information on a per-page basis | ||
* ranked | ||
* ranks pages in relation to the metric | ||
* one number (per metric) for the entire reporting period | ||
* trended | ||
* movement of a single element and metric over time (e.g. visits to world news over time) | ||
|
||
It's still early days -- expect some things not to work. | ||
|
||
report = network.report \ | ||
.over_time(metrics=['pageviews', 'visitors']) \ | ||
.range('2013-05-01', '2013-05-31', granularity='month') | ||
.sync() | ||
|
||
Accessing the data in a report works as follows: | ||
|
||
report.data['pageviews'] | ||
|
||
### Running multiple reports | ||
|
||
If you're interested in automating a large number of reports, you can speed up the | ||
execution by first queueing all the reports and only then waiting on the results. | ||
|
||
Here's an example: | ||
|
||
queue = [] | ||
for segment in segments: | ||
report = network.report \ | ||
.range('2013-05-01', '2013-05-31', granularity='day') \ | ||
.over_time(metrics=['pageviews']) \ | ||
.filter(segment=segment) | ||
queue.append(report) | ||
|
||
heartbeat = lambda: sys.stdout.write('.') | ||
reports = omniture.sync(queue, heartbeat) | ||
|
||
for report in reports: | ||
print report.segment | ||
print report.data['pageviews'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,279 @@ | ||
import requests | ||
import binascii | ||
from datetime import datetime | ||
import time | ||
import sha | ||
import json | ||
import utils | ||
|
||
|
||
class Value(object): | ||
def __init__(self, title, id, properties={}): | ||
self.title = title | ||
self.id = id | ||
|
||
for k, v in properties.items(): | ||
setattr(self, k, v) | ||
|
||
@classmethod | ||
def list(self, name, items, title='title', id='id'): | ||
values = [Value(item[title], item[id], item) for item in items] | ||
return utils.AddressableList(values, name) | ||
|
||
def __repr__(self): | ||
return "<{title}: {id}>".format(**self.__dict__) | ||
|
||
|
||
class Account(object): | ||
def __init__(self, endpoint='https://api.omniture.com/admin/1.3/rest/'): | ||
self.endpoint = endpoint | ||
|
||
def initialize(self): | ||
data = self.request('Company', 'GetReportSuites')['report_suites'] | ||
suites = [Suite(suite['site_title'], suite['rsid'], self) for suite in data] | ||
self.suites = utils.AddressableList(suites) | ||
|
||
def request(self, api, method, query={}): | ||
response = requests.post( | ||
self.endpoint, | ||
params={'method': api + '.' + method}, | ||
data=json.dumps(query), | ||
headers=self._build_token() | ||
) | ||
return response.json() | ||
|
||
def _serialize_header(self, properties): | ||
header = [] | ||
for key, value in properties.items(): | ||
header.append('{key}="{value}"'.format(key=key, value=value)) | ||
return ', '.join(header) | ||
|
||
def _build_token(self): | ||
nonce = str(time.time()) | ||
base64nonce = binascii.b2a_base64(binascii.a2b_qp(nonce)) | ||
created_date = datetime.today().isoformat() + 'Z' | ||
sha_object = sha.new(nonce + created_date + self.secret) | ||
password_64 = binascii.b2a_base64(sha_object.digest()) | ||
|
||
properties = { | ||
"Username": self.username, | ||
"PasswordDigest": password_64.strip(), | ||
"Nonce": base64nonce.strip(), | ||
"Created": created_date, | ||
} | ||
header = 'UsernameToken ' + self._serialize_header(properties) | ||
|
||
return {'X-WSSE': header} | ||
|
||
def authenticate(self, username, secret=None, prefix='', suffix=''): | ||
if secret: | ||
self.username = username | ||
self.secret = secret | ||
else: | ||
source = username | ||
username = utils.affix(prefix, 'OMNITURE_USERNAME', suffix) | ||
secret = utils.affix(prefix, 'OMNITURE_SECRET', suffix) | ||
self.username = source[username] | ||
self.secret = source[secret] | ||
|
||
self.initialize() | ||
|
||
|
||
class Suite(Value): | ||
def request(self, api, method, query={}): | ||
raw_query = {} | ||
raw_query.update(query) | ||
if 'reportDescription' in raw_query: | ||
raw_query['reportDescription']['reportSuiteID'] = self.id | ||
elif api == 'ReportSuite': | ||
raw_query['rsid_list'] = [self.id] | ||
|
||
return self.account.request(api, method, raw_query) | ||
|
||
def __init__(self, title, id, account): | ||
super(Suite, self).__init__(title, id) | ||
|
||
self.account = account | ||
|
||
@property | ||
@utils.memoize | ||
def metrics(self): | ||
data = self.request('ReportSuite', 'GetAvailableMetrics')[0]['available_metrics'] | ||
return Value.list('metrics', data, 'display_name', 'metric_name') | ||
|
||
@property | ||
@utils.memoize | ||
def elements(self): | ||
data = self.request('ReportSuite', 'GetAvailableElements')[0]['available_elements'] | ||
return Value.list('elements', data, 'display_name', 'element_name') | ||
|
||
@property | ||
@utils.memoize | ||
def evars(self): | ||
data = self.request('ReportSuite', 'GetEVars')[0]['evars'] | ||
return Value.list('evars', data, 'name', 'evar_num') | ||
|
||
@property | ||
@utils.memoize | ||
def segments(self): | ||
data = self.request('ReportSuite', 'GetSegments')[0]['sc_segments'] | ||
return Value.list('segments', data, 'name', 'id') | ||
|
||
@property | ||
def report(self): | ||
return Query(self) | ||
|
||
|
||
class Query(object): | ||
def __init__(self, suite): | ||
self.suite = suite | ||
self.raw = {} | ||
self.id = None | ||
|
||
def _get_key(self, value, category, expand=False): | ||
if not isinstance(value, Value): | ||
value = getattr(self.suite, category)[value] | ||
|
||
if expand: | ||
kv = {} | ||
kv[expand] = value.id | ||
return kv | ||
else: | ||
return value.id | ||
|
||
def range(self, start, stop=None, granularity='day'): | ||
stop = stop or start | ||
|
||
if start == stop: | ||
self.raw['date'] = start | ||
else: | ||
self.raw.update({ | ||
'dateFrom': start, | ||
'dateTo': stop, | ||
}) | ||
|
||
self.raw['dateGranularity'] = granularity | ||
|
||
return self | ||
|
||
def raw(self, properties): | ||
self.raw.update(properties) | ||
return self | ||
|
||
def set(self, key, value): | ||
self.raw[key] = value | ||
return self | ||
|
||
def sort(self, facet): | ||
#self.raw['sortBy'] = facet | ||
raise NotImplementedError() | ||
return self | ||
|
||
def filter(self, segment=None, element=None): | ||
if segment: | ||
self.raw['segment_id'] = self._get_key(segment, 'segments') | ||
|
||
if element: | ||
raise NotImplementedError() | ||
|
||
return self | ||
|
||
def ranked(self, metric): | ||
self.raw['metrics'] = [self._get_key(metric, 'metrics', expand='id')] | ||
self.method = 'QueueRanked' | ||
return self | ||
|
||
def trended(self, metric, element): | ||
self.method = 'QueueTrended' | ||
return self | ||
|
||
def over_time(self, metrics): | ||
self.method = 'QueueOvertime' | ||
self.raw['metrics'] = [self._get_key(metric, 'metrics', expand='id') for metric in metrics] | ||
return self | ||
|
||
def build(self): | ||
return {'reportDescription': self.raw} | ||
|
||
def queue(self): | ||
q = self.build() | ||
self.id = self.suite.request('Report', self.method, q)['reportID'] | ||
return self | ||
|
||
def probe(self, fn, heartbeat=None, interval=1): | ||
status = '' | ||
while status not in ['done', 'ready']: | ||
if heartbeat: | ||
heartbeat() | ||
time.sleep(interval) | ||
response = fn() | ||
status = response['status'] | ||
|
||
return response | ||
|
||
def sync(self, heartbeat=None, interval=1): | ||
if not self.id: | ||
self.queue() | ||
|
||
# this looks clunky, but Omniture sometimes reports a report | ||
# as ready when it's really not | ||
status = lambda: self.suite.request('Report', 'GetStatus', {'reportID': self.id}) | ||
report = lambda: self.suite.request('Report', 'GetReport', {'reportID': self.id}) | ||
self.probe(status, heartbeat, interval) | ||
response = self.probe(report, heartbeat, interval) | ||
return Report(response, self) | ||
|
||
def async(self, callback=None, heartbeat=None, interval=1): | ||
if not self.id: | ||
self.queue() | ||
|
||
raise NotImplementedError() | ||
|
||
def cancel(self): | ||
return self.suite.request('Report', 'CancelReport', {'reportID': self.id}) | ||
|
||
|
||
# TODO: also make this iterable (go through rows) | ||
class Report(object): | ||
def process(self): | ||
self.status = self.raw['status'] | ||
self.timing = { | ||
'queue': float(self.raw['waitSeconds']), | ||
'execution': float(self.raw['runSeconds']), | ||
} | ||
self.report = report = self.raw['report'] | ||
self.metrics = Value.list('metrics', report['metrics'], 'name', 'id') | ||
self.elements = Value.list('elements', report['elements'], 'name', 'id') | ||
self.period = report['period'] | ||
segment = report['segment_id'] | ||
if len(segment): | ||
self.segment = self.query.suite.segments[report['segment_id']] | ||
else: | ||
self.segment = None | ||
|
||
self.data = utils.AddressableDict(self.metrics) | ||
for column in self.data: | ||
column.value = [] | ||
|
||
for row in report['data']: | ||
for i, value in enumerate(row['counts']): | ||
if self.metrics[i].type == 'number': | ||
value = float(value) | ||
self.data[i].append(value) | ||
|
||
def to_dataframe(self): | ||
import pandas as pd | ||
raise NotImplementedError() | ||
# return pd.DataFrame() | ||
|
||
def __init__(self, raw, query): | ||
self.raw = raw | ||
self.query = query | ||
self.process() | ||
|
||
|
||
def sync(queries, heartbeat=None, interval=1): | ||
for query in queries: | ||
query.queue() | ||
|
||
return [query.sync(heartbeat, interval) for query in queries] |
Oops, something went wrong.