Skip to content

Instantly share code, notes, and snippets.

@ernestom
Created October 2, 2012 02:39
Show Gist options
  • Save ernestom/3815865 to your computer and use it in GitHub Desktop.
Save ernestom/3815865 to your computer and use it in GitHub Desktop.
# headers
# URL,Ok Volume (MB),Error Volume (MB),Ok Hits,0xx,200,206,2xx,302,304,3xx,404,4xx,Offloaded Hits,Origin Hits,Origin OK Volume (MB) ,Origin Error Volume (MB)
def parse_file(file):
lines = [l.split(',') for l in open(file) if not l.startswith('#')]
return lines
def is_cached(line):
# offloaded hits
return int(line[13]) > 0
def is_web_page_with_traffic(line):
url = line[0]
ext = url[-4:]
is_page = ext in ['.jsp', '.htm', 'html'] or url.endswith('/')
has_traffic = float(line[1]) > 0
return is_page and has_traffic
def get_cached_objects_percentage(lines):
cached = sum([1 for l in lines if is_cached(l)])
return '%.2f %%' % (100.0 / len(lines) * cached)
def get_avg_size_if(lines, callback):
sizes = []
for line in lines:
if not callback(line):
continue
try:
size = float(line[1]) / float(line[3])
except ZeroDivisionError:
size = 0
sizes.append(size)
return '%.2f KB' % (sum(sizes) / len(sizes) * 1024)
def get_avg_cached_object_size(lines):
return get_avg_size_if(lines, lambda l: is_cached(l))
def get_avg_web_page_size(lines):
return get_avg_size_if(lines, lambda l: is_web_page_with_traffic(l))
def main():
file = '/Users/ernesto/Downloads/dsa_urls_volume_164077_10-01-2012-07-55-50.csv'
lines = parse_file(file)
print get_cached_objects_percentage(lines)
print get_avg_cached_object_size(lines)
print get_avg_web_page_size(lines)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment