Created
February 18, 2013 22:16
-
-
Save dangmai/4981268 to your computer and use it in GitHub Desktop.
Simulation for routing performances in Python using different routing methods. Inspired by RapGenius R [gist](https://gist.github.com/toddwschneider/4947326).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Simulation of Heroku performances under different routing mechanisms (random/ | |
naive vs intelligent). Inspired by RapGenius' simulation, which can be found | |
at http://rapgenius.com/James-somers-herokus-ugly-secret-lyrics. | |
This simulation uses numpy, scipy and pandas for data management/processing, | |
and it has slightly different algorithm than the simulation by RapGenius, | |
though the result is very similar. | |
""" | |
import csv | |
import numpy as np | |
from random import randint | |
from pandas import DataFrame, Series | |
from scipy.stats import poisson | |
import math | |
NAIVE_ROUTE = "Naive" | |
INTELLIGENT_ROUTE = "Intelligent" | |
def simulation(route_mode=NAIVE_ROUTE, reqs_per_min=9000, sim_time_mins=5, | |
dyno_nums=100): | |
""" | |
Run the simulation, returning an array of requests, including data that can | |
be aggregated on. | |
""" | |
reqs_per_ms = float(reqs_per_min) / 60000 | |
sim_time = sim_time_mins * 60000 | |
# How many starting requests at time t (in ms) | |
requests_at_time = poisson.rvs(reqs_per_ms, size=sim_time) | |
# "Unlist" the above poisson distribution to create a list of request | |
# starting times | |
start_times = [] | |
for index, reqs_num in enumerate(requests_at_time): | |
# index is the start time of the request | |
for i in range(reqs_num): | |
start_times.append(index) | |
starting_time_series = Series(start_times) | |
# Total number of requests | |
total_reqs = len(starting_time_series) | |
# Using the Weibull distribution to sample how long the requests take | |
# As per RapGenius recommendation, the shape param is 0.46 | |
wshape = 0.46 | |
# Not really sure why these params are used to determine the scale factors | |
wlambda = 50 / (math.log(2) ** (1 / wshape)) | |
# Right now, don't limit the min and max for request durations | |
theoretical_duration = np.random.weibull(0.46, total_reqs) | |
# Scale the request durations according to the scale factor wlambda | |
theoretical_duration = map(lambda x: x * wlambda, theoretical_duration) | |
theoretical_duration_series = Series(theoretical_duration) | |
requests = DataFrame({ | |
'starting_time': starting_time_series, | |
'theoretical_duration': theoretical_duration_series | |
}) | |
# Calculate the ending times for the requests, depending on the route | |
# method | |
# This contains the next available information for dynos | |
dynos = [0] * dyno_nums | |
ending_times = [] | |
# Theoretically, we can figure out whether a request is queued or not, by | |
# checking whether (ending_time - starting_time) == theretical_duration, | |
# however, that involves floating point arithmetic and somehow gives wrong | |
# answer. So here we introduce a Series that keep track whether a request | |
# is queued or not. | |
queued = [] | |
for index, request in requests.iterrows(): | |
row_queued = False | |
if route_mode is NAIVE_ROUTE: | |
# In naive mode, choose a random dyno index uniformly. | |
dyno_index = randint(0, len(dynos) - 1) | |
else: | |
# In intelligent mode, choose a dyno that is either currently | |
# available, or will be available the soonest. | |
dyno_index = dynos.index(min(dynos)) | |
start_at = request["starting_time"] | |
if dynos[dyno_index] > request["starting_time"]: | |
row_queued = True | |
start_at = dynos[dyno_index] | |
ending_time = start_at + request["theoretical_duration"] | |
dynos[dyno_index] = ending_time | |
ending_times.append(ending_time) | |
queued.append(row_queued) | |
requests['ending_time'] = Series(ending_times) | |
requests['queued'] = Series(queued) | |
requests['real_duration'] = requests['ending_time'] - \ | |
requests['starting_time'] | |
requests['time_in_queue'] = requests['real_duration'] - \ | |
requests['theoretical_duration'] | |
# Metadata | |
requests.dynos = dyno_nums | |
requests.mode = route_mode | |
return requests | |
"""Below are the functions to aggregate the data""" | |
def aggfunc_request_queued_nums(result): | |
s = result["queued"] | |
return round((float(len(s[s == True])) / len(result) * 100)) | |
def aggfunc_mean_time_in_queue(result): | |
return round(result["time_in_queue"].mean()) | |
def aggfunc_median_time_in_queue(result): | |
return round(result["time_in_queue"].median()) | |
def aggfunc_mean_time_in_queue_if_queued(result): | |
queued = result[result["queued"] == True] | |
s = queued["time_in_queue"] | |
return round(s.mean()) | |
def aggfunc_median_time_in_queue_if_queued(result): | |
queued = result[result["queued"] == True] | |
s = queued["time_in_queue"] | |
return round(s.median()) | |
def to_csv(*results): | |
""" | |
Export a summary of the results to a CSV file. | |
""" | |
fields = { | |
"# of dynos": lambda result: result.dynos, | |
"Routing mode": lambda result: result.mode, | |
"% of requests queued": aggfunc_request_queued_nums, | |
"Mean time in queue": aggfunc_mean_time_in_queue, | |
"Mean time in queue if queued": aggfunc_mean_time_in_queue_if_queued, | |
"Median time in queue": aggfunc_median_time_in_queue, | |
"Median time in queue if queued": aggfunc_median_time_in_queue_if_queued | |
} | |
with open('result.csv', 'wb') as csv_file: | |
writer = csv.writer(csv_file) | |
writer.writerow(fields.keys()) | |
for result in results: | |
row = [] | |
for aggfunc in fields.itervalues(): | |
row.append(aggfunc(result)) | |
writer.writerow(row) | |
def main(): | |
to_csv( | |
simulation(route_mode=INTELLIGENT_ROUTE, dyno_nums=50), | |
simulation(route_mode=INTELLIGENT_ROUTE, dyno_nums=60), | |
simulation(route_mode=INTELLIGENT_ROUTE, dyno_nums=75), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=75), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=100), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=150), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=200), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=500), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=1000), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=2000), | |
simulation(route_mode=NAIVE_ROUTE, dyno_nums=4000) | |
) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment