Last active
September 30, 2024 12:05
-
-
Save awwalm/1f10aa15a569760c8c1dabd0a1ce29d2 to your computer and use it in GitHub Desktop.
String Reversal Benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Benchmarking of memory-optimal string reversal.""" | |
import inspect | |
import random | |
import sys | |
import time | |
import matplotlib.pyplot as plt | |
def reverse_list_str(s: str): | |
ca = list(s) | |
n, m = 0, len(s) - 1 | |
while n <= m: | |
ca[n], ca[m] = ca[m], ca[n] | |
n += 1 | |
m -= 1 | |
return str().join(ca) | |
def reverse_byte_str(s: str): | |
ba = bytearray( | |
s, 'utf-8', 'surrogatepass') | |
n, m = 0, len(s) - 1 | |
l = m + 1 | |
while n <= m: | |
ba[n], ba[m] = ba[m], ba[n] | |
n += 1 | |
m -= 1 | |
return str(ba)[12:12 + l] | |
# noinspection PyTypeChecker | |
def benchmark(): | |
samples = int(500e3) | |
# str_range = # [i for i in range(100, 10001, 500)] | |
str_range = [i for i in range(int(1e3), samples + 1, int(1e3))] | |
str_data = [] # String of length str_range[i] | |
time_list, time_byte = [], [] | |
memory_list, memory_byte = [], [] | |
# Compute randomly generated strings of increasing length | |
for r in str_range: | |
c = "" | |
for _ in range(r): | |
c += chr(random.randint(1, 1114111)) | |
str_data.append(c) | |
# Record time and memory consumption | |
for s in str_data: | |
t = time.perf_counter() | |
rs = reverse_list_str(s) | |
t2 = time.perf_counter() - t | |
time_list.append(t2) | |
memory_list.append(sys.getsizeof(list(rs))) | |
t = time.perf_counter() | |
rs = reverse_byte_str(s) | |
t2 = time.perf_counter() - t | |
time_byte.append(t2) | |
memory_byte.append(sys.getsizeof(bytearray(rs, encoding='utf-8', errors='surrogatepass'))) | |
fig = plt.figure() | |
gs = fig.add_gridspec(2, 7) | |
# grid = gs.subplots(sharex=False, sharey=False) | |
# _time, _memory = grid[0][0], grid[1][0] | |
# l_imp, ba_imp = grid[1][0], grid[1][1] | |
_time = plt.subplot(gs[0, :5]) | |
_memory = plt.subplot(gs[1, :5]) | |
l_imp = plt.subplot(gs[0, 5:]) | |
ba_imp = plt.subplot(gs[1, 5:]) | |
# Code blocks | |
ba_imp.set_xticks([]) | |
ba_imp.set_yticks([]) | |
ba_imp.set_xticklabels([]) | |
ba_imp.set_yticklabels([]) | |
ba_imp.set_title('\n(d) Bytearray Algorithm') | |
ba_imp.text(0.02, 0.4, | |
s=inspect.getsource(reverse_byte_str), | |
family='courier new', | |
fontsize=13) | |
l_imp.set_xticks([]) | |
l_imp.set_yticks([]) | |
l_imp.set_xticklabels([]) | |
l_imp.set_yticklabels([]) | |
l_imp.set_title('\n(c) List-based Algorithm') | |
l_imp.text(0.02, 0.5, | |
s=inspect.getsource(reverse_list_str), | |
family='courier new', | |
fontsize=13) | |
# Runtime in seconds | |
_time.plot(str_range, time_list, label='List', color='red') | |
_time.plot(str_range, time_byte, label='Bytearray', color='blue') | |
_time.set_xticklabels([]) | |
_time.set(ylabel='Time taken (Seconds)') | |
_time.set_title('\n(a) Runtime') | |
_time.legend() | |
# Memory consumption in bytes | |
_memory.plot(str_range, memory_list, label='List', color='red') | |
_memory.plot(str_range, memory_byte, label='Bytearray', color='blue') | |
_memory.set(xlabel='String lengths', ylabel='Memory consumption (Bytes)') | |
_memory.set_yticks(_memory.get_yticks()[1:]) | |
_memory.set_yticklabels([f'{int(x)}' for x in _memory.get_yticks().tolist()]) | |
_memory.set_title('(b) Space Usage') | |
_memory.legend() | |
plt.suptitle(f'String Reversal Benchmarking: Using Bytearray vs List' | |
f'\n[ {len(str_range):,} Incremental Length Samples of Randomly Generated ' | |
f'Character Strings at Intervals of {1000:,} ]', | |
weight='bold') | |
# Time Statistics | |
t_stats = (f"max(List) = {max(time_list):.3e} s\n" | |
f"max(Bytes) = {max(time_byte):.3e} s") | |
t_box = dict(boxstyle='round', fc='blanchedalmond', ec='orange', alpha=0.5) | |
_time.text(str_range[int(0.2 * len(str_range))], | |
0.875 * max(max(time_list), max(time_byte)), | |
t_stats, | |
bbox=t_box, | |
horizontalalignment='left') | |
# Memory Statistics | |
m_stats = (f"max(List) = {max(memory_list) / 1000} KB\n" | |
f"max(Bytes) = {max(memory_byte) / 1000} KB") | |
m_box = dict(boxstyle='round', fc='blanchedalmond', ec='orange', alpha=0.5) | |
_memory.text(str_range[int(0.2 * len(str_range))], | |
0.875 * max(max(memory_list), max(memory_byte)), | |
m_stats, | |
bbox=m_box, | |
horizontalalignment='left') | |
plt.show() | |
def test_basic(): | |
str1 = "abcdef" | |
str2 = "12345" | |
print(f"Original strings:\n{str1}\n{str2}\n") | |
print("Reversed strings:") | |
print(reverse_byte_str(str1)) | |
print(reverse_byte_str(str2)) | |
print(reverse_list_str(str1)) | |
print(reverse_list_str(str2)) | |
if __name__ == "__main__": | |
test_basic() | |
benchmark() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment