Created
July 21, 2010 00:07
-
-
Save dound/483827 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this file is from the google-chartwrapper project (http://code.google.com/p/google-chartwrapper/) | |
coding = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' | |
ecoding = coding + '-.' | |
codeset = { | |
'simple': { | |
'coding': coding, | |
'max_value': 61, | |
'char': ',', | |
'dchar': '', | |
'none': '_', | |
'value': lambda x: coding[x] | |
}, | |
'text': { | |
'coding': '', | |
'max_value': 100, | |
'none': '-1', | |
'char': '|', | |
'dchar': ',', | |
'value': lambda x: '%.1f' % float(x) | |
}, | |
'extended': { | |
'coding': ecoding, | |
'max_value': 4095, | |
'none': '__', | |
'dchar': '', | |
'char': ',', | |
'value': lambda x: '%s%s' % \ | |
(ecoding[int(float(x) / 64)], ecoding[int(x % 64)]) | |
} | |
} | |
class Encoder: | |
"""Data encoder that handles simple,text, and extended encodings | |
Based on javascript encoding algorithm and pygooglecharts""" | |
def __init__(self, encoding=None, scale=None, series=''): | |
self.series = series or '' | |
if encoding is None: | |
encoding = 'text' | |
assert(encoding in ('simple','text','extended')),\ | |
'Unknown encoding: %s'%encoding | |
self.encoding = encoding | |
self.scale = scale | |
self.codeset = codeset[encoding] | |
def scalevalue(self, value): | |
return value # one day... | |
if self.encoding != 'text' and self.scale and \ | |
isinstance(value, int) or isinstance(value, float): | |
if type(self.scale) == type(()): | |
lower,upper = self.scale | |
else: | |
lower,upper = 0,float(self.scale) | |
value = int(round(float(value - lower) * \ | |
self.codeset['max_value'] / upper)) | |
return min(value, self.codeset['max_value']) | |
def encode(self, *args, **kwargs): | |
"""Encode wrapper for a dataset with maximum value | |
Datasets can be one or two dimensional | |
Strings are ignored as ordinal encoding""" | |
if isinstance(args[0], str): | |
return self.encode([args[0]],**kwargs) | |
elif isinstance(args[0], int) or isinstance(args[0], float): | |
return self.encode([[args[0]]],**kwargs) | |
if len(args)>1: | |
dataset = args | |
else: | |
dataset = args[0] | |
typemap = list(map(type,dataset)) | |
code = self.encoding[0] | |
if type('') in typemap: | |
data = ','.join(map(str,dataset)) | |
elif type([]) in typemap or type(()) in typemap: | |
data = self.codeset['char'].join(map(self.encodedata, dataset)) | |
elif len(dataset) == 1 and hasattr(dataset[0], '__iter__'): | |
data = self.encodedata(dataset[0]) | |
else: | |
try: | |
data = self.encodedata(dataset) | |
except ValueError: | |
data = self.encodedata(','.join(map(unicode,dataset))) | |
if not '.' in data and code == 't': | |
code = 'e' | |
return '%s%s:%s'%(code,self.series,data) | |
def encodedata(self, data): | |
sub_data = [] | |
enc_size = len(self.codeset['coding']) | |
for value in data: | |
if value in (None,'None'): | |
sub_data.append(self.codeset['none']) | |
elif isinstance(value, str): | |
sub_data.append(value) | |
elif value >= -1: | |
try: | |
sub_data.append(self.codeset['value'](self.scalevalue(value))) | |
except: | |
raise ValueError('cannot encode value: %s'%value) | |
return self.codeset['dchar'].join(sub_data) | |
def decode(self, astr): | |
e = astr[0] | |
dec_data = [] | |
for data in astr[2:].split(self.codeset['char']): | |
sub_data = [] | |
if e == 't': | |
sub_data.extend(map(float, data.split(','))) | |
elif e == 'e': | |
flag = 0 | |
index = self.codeset['coding'].index | |
for i in range(len(data)): | |
if not flag: | |
this,next = index(data[i]),index(data[i+1]) | |
flag = 1 | |
sub_data.append((64 * this) + next) | |
else: flag = 0 | |
elif e == 's': | |
sub_data.extend(map(self.codeset['coding'].index, data)) | |
dec_data.append(sub_data) | |
return dec_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import gzip | |
import re | |
import StringIO | |
import urllib2 | |
from encoding import Encoder # from google-chartwrapper | |
from numpy import numarray | |
import pylab | |
DATASTORE_STATUS_URL = 'http://code.google.com/status/appengine/detail/datastore/%s' | |
DATE_FORMAT = '%Y/%m/%d' | |
ENC = Encoder('extended') | |
#ERROR_RATES = ('get', 'put', 'update', 'delete', 'query') | |
ERROR_RATES = ('put', 'update', 'delete') | |
NUM_BINS = 24 * 4 # bins are 15 minutes wide | |
RAW_MAX = 4095 | |
X_MAX = 24.0 | |
Y_MAX = 100.0 | |
Y_THRESH = 0.0 | |
RE_CHD = re.compile(r'&chd=([^&]*)') | |
def extract_data_string_from_url(url): | |
m = RE_CHD.search(url) | |
if m: | |
return m.group(1) | |
else: | |
raise Exception("url does not contain any data - missing query parameter 'chd'") | |
RESTR_SET = r'[^,]*' | |
RE_SECOND_SET = re.compile(r'e:%s,%s' % (RESTR_SET, RESTR_SET)) | |
def data_string_to_scaled_data(data_str): | |
m = RE_SECOND_SET.search(data_str) | |
if not m: | |
raise Exception("data string does not contain data in the expected format") | |
edata = m.group(0) | |
if not edata: | |
return [] # no downtime if there is only one data set | |
xraw, yraw = ENC.decode(edata) | |
xscl = [x*X_MAX/RAW_MAX for x in xraw] | |
yscl = [y*Y_MAX/RAW_MAX for y in yraw] | |
return zip(xscl, yscl) | |
def bin_data(data_scaled, y_thresh=Y_THRESH): | |
"""Returns a list. Each element represents a period of time throughout the | |
day (length of time determined by NUM_BINS - e.g., 12 bins => each element | |
covers 24/12=2hours). The element is True iff there was downtime greater | |
than y_thresh. | |
""" | |
prev_down_i = None | |
ret = [False] * NUM_BINS | |
for x,y in data_scaled: | |
if y > y_thresh: | |
i = min(int(x/X_MAX * NUM_BINS), NUM_BINS-1) | |
ret[i] = True | |
# if previous reading was downtime too, then mark all bins between | |
# the previous reading and this reading as downtime too | |
if prev_down_i is not None: | |
for j in xrange(prev_down_i, i): | |
ret[j] = True | |
prev_down_i = i | |
else: | |
prev_down_i = None | |
return ret | |
def extract_data_for_specific_error_rate(html, name): | |
m = re.search(r'"ae-trust-detail-datastore-%s-error_rate-link"[^(]*[(]([^)]*)[)]'%name, html, re.DOTALL) | |
if not m: | |
raise Exception("Missing data for %s" % name) | |
chart_url = m.group(1) | |
data_str = extract_data_string_from_url(chart_url) | |
data_scaled = data_string_to_scaled_data(data_str) | |
data_binned = bin_data(data_scaled) | |
return data_binned | |
def extract_error_rate_data(url, error_rates=ERROR_RATES): | |
"""Extracts error rate data from `url` for each error rate specified in | |
error_rates. The output is like that of bin_data(), except the results of | |
each error_rate is OR'd together. Thus each element/bin in the returned | |
list indicates whether ANY error rate was greater than Y_THRESH for the | |
chunk of time it represents. | |
""" | |
try: | |
resp = urllib2.urlopen(url) | |
except urllib2.HTTPError, e: | |
raise Exception('HTTP failure: %s (%s)' % (e,url)) | |
except urllib2.URLError, e: | |
raise Exception('Fetch failure: %s (%s)' % (e,url)) | |
html = resp.read() | |
if resp.info().get('content-encoding') == 'gzip': | |
html = gzip.GzipFile(fileobj=StringIO.StringIO(html)).read() | |
datas = [] | |
for name in error_rates: | |
datas.append(extract_data_for_specific_error_rate(html, name)) | |
# combine the channels - if any is True, mark the bin as true | |
return [any(t) for t in zip(*datas)] | |
def plot_bins(data, num_days, num_bins, yticksz=5, width=0.5): | |
labels = [] | |
if num_bins >= 24: | |
for i in xrange(24): | |
labels += ['%02d:00'%i] | |
labels += ['']*(num_bins/24-1) | |
else: | |
for i in xrange(num_bins): | |
labels += ['%d:00-%d:00' % (i*(24.0/num_bins), (i+1)*(24.0/num_bins))] | |
xlocs = numarray.array(range(len(data))) + width | |
pylab.bar(xlocs, data, width=width) | |
pylab.xticks(xlocs+width/2.0, labels) | |
pylab.xlabel("Time of Day") | |
pylab.yticks(filter(lambda x: x%yticksz==0,range(0,max(data)+yticksz))) | |
pylab.ylabel("# of Days with Downtime") | |
pylab.xlim(0, xlocs[-1]+width*2) | |
pylab.ylim(0, int((max(data)+yticksz)/yticksz)*yticksz+1) | |
pylab.title("# Days with Downtime in the Past %d Days" % num_days) | |
pylab.gca().get_xaxis().tick_bottom() | |
pylab.gca().get_yaxis().tick_left() | |
pylab.show() | |
def main(start=datetime.datetime(2009,07,20), end=datetime.datetime.now()): | |
"""Time of day is divided up into NUM_BINS equi-sized chunks. Then downtime | |
from each day from `start` to `end` is retrieved. Each chunk counts the | |
number of days which experienced downtime greater than Y_THRESH during the | |
time block allocated to the chunk. | |
Data is printed to stdout and shown in a plot. | |
""" | |
dt = start | |
one_day = datetime.timedelta(days=1) | |
counts = [0] * NUM_BINS | |
num_days = 0 | |
while dt < end: | |
dt_str = dt.strftime(DATE_FORMAT) | |
print 'working on', dt_str | |
datas_for_day = extract_error_rate_data(DATASTORE_STATUS_URL % dt_str) | |
print '%s => %s' % (dt_str, datas_for_day) | |
for i,b in enumerate(datas_for_day): | |
if b: | |
counts[i] += 1 | |
dt = dt + one_day | |
num_days += 1 | |
print 'num days = %d' % num_days | |
print 'chunk downtime counts =', counts | |
plot_bins(counts, num_days, NUM_BINS) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A quick script to download data on Google App Engine datastore downtime.