Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save jakelevi1996/5c54dc9afc320bb507a3d39c6992f8a8 to your computer and use it in GitHub Desktop.

Select an option

Save jakelevi1996/5c54dc9afc320bb507a3d39c6992f8a8 to your computer and use it in GitHub Desktop.
timeplot: a Python module for plotting time complexity

timeplot: a Python module for plotting time complexity

Here is the source code (and some usage examples and their resulting images, shown below) for timeplot: a Python function/module which accepts a dictionary of string:function pairs, and plots the time complexity of each function over a specified range of input sizes (the key-strings are used as legend entries). Each function must accept an integer size, and return a floating-point value for the time-taken for the function's main operation (not including set-up code). Usage examples and images shown below include comparing different matrix operations, comparing strategies for modifying an iterable while iterating over it, and comparing the efficiency of operations on python lists vs dicts. The images are shown first, followed by the modules that generated them using timeplot.

import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from time import perf_counter
def timeplot(
func_dict, size_lo=10, size_hi=1e5, n_sizes=10, n_repeats=5,
plot_name="Time complexity", dir_name="Images", verbose=True, xlabel="Size"
):
t0 = perf_counter()
# Create list of sizes to test
size_list = np.logspace(
np.log10(size_lo), np.log10(size_hi), n_sizes, dtype=int)
size_list = np.unique(size_list)
# Create name list and empty results array
name_list = list(func_dict.keys())
results = np.empty([size_list.size, len(name_list), n_repeats])
# Create figure and colours
plt.figure(figsize=[8, 6])
colours = plt.get_cmap("hsv")(
np.linspace(0, 1, len(name_list), endpoint=False)
)
# Iterate through sizes, functions, and repeats
for i, size in enumerate(size_list):
for j, func_name in enumerate(name_list):
for n in range(n_repeats):
# Display progress
if verbose: print(f"size={size}, func={func_name}, n={n}")
# Get function time
t = func_dict[func_name](size)
# Store and plot result
results[i, j, n] = t
plt.loglog([size], [t], "o", c=colours[j], alpha=0.5)
# Plot means
for i in range(len(name_list)):
mean_results = results[:, i, :].mean(axis=-1)
plt.loglog(size_list, mean_results, "--", c=colours[i], alpha=0.5)
# Add legend
line = lambda i, n: Line2D(
[], [], ls="--", marker="o", c=colours[i], label=n)
handles=[line(i, n) for i, n in enumerate(name_list)]
plt.legend(handles=handles)
# Add formatting
plt.title(plot_name)
plt.grid(which="major")
plt.grid(which="minor", alpha=0.25)
plt.xlabel(xlabel)
plt.ylabel("Time (s)")
plt.tight_layout()
# Save and close the figure
if not os.path.isdir(dir_name): os.makedirs(dir_name)
plt.savefig("{}/{}.png".format(dir_name, plot_name))
plt.close()
if verbose:
print(f"Time taken for timeplot = {perf_counter() - t0:.3f} s")
import numpy as np
from scipy.linalg import lu
from timeit import timeit
from timeplot import timeplot
def time_gaussian_matrix(size):
t = timeit(lambda: np.random.normal(size=(size, size)), number=1)
return t
def time_matrix_vector_multiply(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: A @ b, number=1)
return t
def time_matrix_matrix_multiply(size):
A = np.random.normal(size=(size, size))
t = timeit(lambda: A @ A, number=1)
return t
def time_matrix_solve(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: np.linalg.solve(A, b), number=1)
return t
def time_lu(size):
A = np.random.normal(size=(size, size))
t = timeit(lambda: lu(A), number=1)
return t
def time_cholesky(size):
A = np.random.normal(size=(size, size))
A_h = A @ A.T
t = timeit(lambda: np.linalg.cholesky(A_h), number=1)
return t
def time_qr(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: np.linalg.qr(A), number=1)
return t
def time_ls(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: np.linalg.lstsq(A, b, rcond=None), number=1)
return t
def time_eigh(size):
A = np.random.normal(size=(size, size))
A_h = A @ A.T
t = timeit(lambda: np.linalg.eigh(A_h), number=1)
return t
def warmup(): time_eigh(100)
if __name__ == "__main__":
warmup()
func_dict = {
"Generate Gaussian matrix": time_gaussian_matrix,
"Matrix-vector multiplication": time_matrix_vector_multiply,
"Matrix-matrix multiplication": time_matrix_matrix_multiply,
"Matrix solve": time_matrix_solve,
"LU decomposition": time_lu,
"Cholesky decomposition": time_cholesky,
"QR decomposition": time_qr,
"Eigenvalue decomposition": time_eigh,
"Least-squares solving": time_ls,
}
timeplot(
func_dict, size_lo=10, size_hi=1e3, n_sizes=10, n_repeats=3,
plot_name="Time complexity of square, full-rank matrix operations",
dir_name="Images", verbose=True, xlabel="Matrix rank"
)
from timeplot import timeplot
from timeit import timeit
class C:
def __init__(self, data): self.init_data, self.data = data, data
def do_processing(self): self.data += 1
def ready_to_delete(self): return ((self.data % 20) == 0)
def __hash__(self): return hash(self.init_data)
def __repr__(self): return "C({})".format(self.init_data)
def filter_list_at_end(size):
not_ready_to_delete = lambda c: not c.ready_to_delete()
def f():
c_list = [C(i) for i in range(size)]
while len(c_list) > 0:
for c in c_list:
c.do_processing()
c_list = list(filter(not_ready_to_delete, c_list))
t = timeit(f, number=1)
return t
def list_comp_at_end(size):
def f():
c_list = [C(i) for i in range(size)]
while len(c_list) > 0:
for c in c_list:
c.do_processing()
c_list = [c for c in c_list if not c.ready_to_delete()]
t = timeit(f, number=1)
return t
def set_comp_at_end(size):
def f():
c_set = {C(i) for i in range(size)}
while len(c_set) > 0:
for c in c_set:
c.do_processing()
c_set = {c for c in c_set if not c.ready_to_delete()}
t = timeit(f, number=1)
return t
def delete_from_set_copy(size):
def f():
c_set = {C(i) for i in range(size)}
while len(c_set) > 0:
for c in c_set.copy():
c.do_processing()
if c.ready_to_delete(): c_set.remove(c)
t = timeit(f, number=1)
return t
def delete_from_set_list(size):
def f():
c_set = {C(i) for i in range(size)}
while len(c_set) > 0:
for c in list(c_set):
c.do_processing()
if c.ready_to_delete(): c_set.remove(c)
t = timeit(f, number=1)
return t
def delete_from_list(size):
def f():
c_list = [C(i) for i in range(size)]
i = 0
while len(c_list) > 0:
c_list[i].do_processing()
if c_list[i].ready_to_delete(): del c_list[i]
else: i += 1
if i >= len(c_list): i = 0
t = timeit(f, number=1)
return t
def warmup(): filter_list_at_end(1000)
if __name__ == "__main__":
warmup()
func_dict = {
"Filter list at end of iteration": filter_list_at_end,
"List comp at end of iteration": list_comp_at_end,
"Set comp at end of iteration": set_comp_at_end,
"Delete from set copy": delete_from_set_copy,
"Delete from list(set)": delete_from_set_list,
"Delete from list": delete_from_list,
}
plot_name="Comparison of methods for modifying an iterable while iterating"
timeplot(
func_dict, size_lo=10, size_hi=1e4, n_sizes=10, n_repeats=3,
plot_name=plot_name, dir_name="Images", verbose=True,
xlabel="Number of items"
)
import numpy as np
from timeit import repeat
from timeplot import timeplot
def time_dict_insert(size):
initial_keys = np.random.normal(size=[size, 2])
initial_vals = np.random.normal(size=[size, 1])
d = {tuple(k): float(v) for k, v in zip(initial_keys, initial_vals)}
new_key = tuple(np.random.normal(size=2))
new_val = np.random.normal()
def f(): d[new_key] = new_val
t = min(repeat(f, number=1, repeat=3))
return t
def time_dict_retrieve(size):
initial_keys = np.random.normal(size=[size, 2])
initial_vals = np.random.normal(size=[size, 1])
d = {tuple(k): float(v) for k, v in zip(initial_keys, initial_vals)}
key = tuple(initial_keys[np.random.choice(size)])
def f(): return d[key]
t = min(repeat(f, number=1, repeat=3))
return t
def time_dict_pop(size):
initial_keys = np.random.normal(size=[size, 2])
initial_vals = np.random.normal(size=[size, 1])
d = {tuple(k): float(v) for k, v in zip(initial_keys, initial_vals)}
key = tuple(initial_keys[np.random.choice(size)])
def f(): d.pop(key, None)
t = min(repeat(f, number=1, repeat=3))
return t
def time_list_insert_start(size):
data_list = [list(x) for x in np.random.normal(size=[size, 3])]
new_val = list(np.random.normal(size=[3]))
def f(): data_list.insert(0, new_val)
t = min(repeat(f, number=1, repeat=3))
return t
def time_list_delete_start(size):
data_list = [list(x) for x in np.random.normal(size=[size, 3])]
new_val = list(np.random.normal(size=[3]))
def f(): del data_list[0]
t = min(repeat(f, number=1, repeat=3))
return t
def time_list_retrieve_start(size):
data_list = [list(x) for x in np.random.normal(size=[size, 3])]
new_val = list(np.random.normal(size=[3]))
def f(): return data_list[0]
t = min(repeat(f, number=1, repeat=3))
return t
def warmup(): time_list_delete_start(1000)
if __name__ == "__main__":
warmup()
func_dict = {
"dict insert": time_dict_insert,
"dict retrieve": time_dict_retrieve,
"dict pop (*)": time_dict_pop,
"list insert": time_list_insert_start,
"list retrieve": time_list_retrieve_start,
"list delete": time_list_delete_start,
}
timeplot(
func_dict, size_lo=10, size_hi=1e5, n_sizes=10, n_repeats=3,
plot_name="dict vs list", dir_name="Images", verbose=True,
xlabel="Size"
)
import numpy as np
from timeit import timeit
from timeplot import timeplot
def time_sort(size):
gaussian_list = list(np.random.normal(size=size))
t = timeit(lambda: gaussian_list.sort(), number=1)
return t
def warmup(): time_sort(100)
if __name__ == "__main__":
warmup()
func_dict = {
"list.sort()": time_sort,
}
timeplot(
func_dict, size_lo=100, size_hi=1e6, n_sizes=10, n_repeats=3,
plot_name="Complexity of sorting", dir_name="Images", verbose=True,
xlabel="Size of list"
)
from timeit import timeit
import numpy as np
from scipy.optimize import lsq_linear
from timeplot import timeplot
def numpy_lsq(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: np.linalg.lstsq(A, b, rcond=None), number=1)
return t
def scipy_lsq(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: lsq_linear(A, b), number=1)
return t
def scipy_lsq_constrained(size):
A = np.random.normal(size=(size, size))
b = np.random.normal(size=size)
t = timeit(lambda: lsq_linear(A, b, bounds=(0, np.inf)), number=1)
return t
def warmup(): numpy_lsq(100)
if __name__ == "__main__":
warmup()
name = "Time complexity of least-squares matrix methods (square matrices)"
func_dict = {
"Numpy least-squares": numpy_lsq,
"Scipy least-squares": scipy_lsq,
"Scipy least-squares (constrained)": scipy_lsq_constrained,
}
timeplot(
func_dict, size_lo=10, size_hi=1e3, n_sizes=10, n_repeats=3,
plot_name=name,
dir_name="Images", verbose=True, xlabel="Matrix rank"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment