Created
April 9, 2015 18:34
-
-
Save cgoldberg/8fa5fd1ed4e620efd633 to your computer and use it in GitHub Desktop.
analyze top slowest pages using onload event beacon data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import collections | |
import re | |
from operator import itemgetter | |
import numpy | |
DATA_FILE = 'perflog-everything-onload.csv' | |
NUM_RESULTS = 50 | |
PAGE_REGEX = re.compile(r'""page"": ""(.+?)""') | |
ONLOAD_REGEX = re.compile(r'""value"": ""(.+?)""') | |
def count_pages(filename): | |
with open(filename) as f: | |
events = f.readlines() | |
unique_pages = set() | |
for row in events: | |
match = PAGE_REGEX.search(row) | |
page = match.group(1) | |
unique_pages.add(page) | |
return len(events), len(unique_pages) | |
if __name__ == '__main__': | |
page_times = collections.defaultdict(list) | |
with open(DATA_FILE) as f: | |
rows = f.readlines() | |
for row in rows: | |
match = PAGE_REGEX.search(row) | |
page_url = match.group(1) | |
match = ONLOAD_REGEX.search(row) | |
onload_timer = int(match.group(1)) / 1000.0 | |
page_times[page_url].append(onload_timer) | |
results = [] | |
for url, onload_times in page_times.items(): | |
num_requests = len(onload_times) | |
percentile_95_time = numpy.percentile(onload_times, 95) | |
results.append((num_requests, percentile_95_time, url)) | |
total_requests, unique_pages = count_pages(DATA_FILE) | |
print 'analyzing {} requests.'.format(total_requests) | |
print 'found {} unique pages.'.format(unique_pages) | |
print '\n' | |
print 'views\ttime (95th)\turl' | |
print '-' * 50 | |
results_by_num_requests = sorted(results, key=itemgetter(0), reverse=True) | |
for result in results_by_num_requests[:NUM_RESULTS]: | |
num_requests, percentile_95_time, url = result | |
timer = float('{0:.2f}'.format(percentile_95_time)) | |
print '{}\t{}\t\t{}'.format(num_requests, timer, url) | |
print '\n' | |
print 'views\ttime (95th)\turl' | |
print '-' * 50 | |
results_by_onload_time = sorted(results, key=itemgetter(1), reverse=True) | |
for result in results_by_onload_time[:NUM_RESULTS]: | |
num_requests, percentile_95_time, url = result | |
timer = float('{0:.2f}'.format(percentile_95_time)) | |
print '{}\t{}\t\t{}'.format(num_requests, timer, url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment