Last active
August 6, 2018 15:38
-
-
Save farrokhi/74d2fe2d4705a2556289c3cc9f76d20f to your computer and use it in GitHub Desktop.
comparing performance of various string concatenation methods in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import wraps | |
import pickle | |
from random import randint, choice | |
import string | |
import pathlib | |
import time | |
import sys | |
big_list = [] # our huge list of tuples | |
cache_file = "test-data-cache.pickle" | |
def chrono(msg='', format_time=False): | |
def decorator(func): | |
@wraps(func) | |
def wrapper(*args, **kwargs): | |
local_chrono = Chrono() | |
res = func(*args, **kwargs) | |
local_chrono.stop(msg, format_time) | |
return res | |
return wrapper | |
return decorator | |
class Chrono(object): | |
def __init__(self): | |
self._start_time = time.perf_counter() | |
@staticmethod | |
def _format_time(t) -> str: | |
hh = t // 3600 | |
mm = (t % 3600) // 60 | |
ss = t - (hh * 3600) - (mm * 60) | |
return "%02dh%02dm%02ds" % (hh, mm, ss) | |
def stop(self, msg='', format_time=False) -> float: | |
stop_time = time.perf_counter() | |
t_elapsed = stop_time - self._start_time | |
if msg: # print message if any | |
if format_time: | |
t_str = self._format_time(t_elapsed) | |
else: | |
t_str = "%.3f seconds" % t_elapsed | |
print("[ elapsed %s ] - %s" % (t_str, msg), file=sys.stderr) | |
return t_elapsed | |
def rand_str(): | |
allchar = string.ascii_letters + string.digits | |
return "".join(choice(allchar) for _ in range(randint(6, 18))) | |
def build_list(n): | |
global big_list | |
c = Chrono() | |
for i in range(n): | |
tpl = ( | |
rand_str(), str(randint(0, 655359999)), str(randint(0, 65535999)), str(randint(0, 655359999)), rand_str()) | |
big_list.append(tpl + tpl + tpl + tpl) # each records has 20 fields | |
c.stop(f"built list of {list_size} items") | |
# save for later | |
dumpfile = open(cache_file, 'wb') | |
c = Chrono() | |
pickle.dump(big_list, dumpfile) | |
c.stop("dump cache file") | |
dumpfile.close() | |
def load_list(): | |
global big_list | |
dumpfile = open(cache_file, 'rb') | |
c = Chrono() | |
big_list = pickle.load(dumpfile) | |
c.stop(f"load cache file : {len(big_list)} records") | |
dumpfile.close() | |
@chrono('str.join()') | |
def test_join(): | |
arr = [] | |
for item in big_list: | |
s = "|".join(item) | |
arr.append(s) | |
return arr[-1] | |
@chrono("percent formatting") | |
def test_percent_format(): | |
arr = [] | |
for item in big_list: | |
s = "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s" % ( | |
item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8], item[9], item[10], | |
item[11], item[12], item[13], item[14], item[15], item[16], item[17], item[18], item[19]) | |
arr.append(s) | |
return arr[-1] | |
@chrono("f-string formatting") | |
def test_fstring_format(): | |
arr = [] | |
for item in big_list: | |
s = f"{item[0]}|{item[1]}|{item[2]}|{item[3]}|{item[4]}|{item[5]}|{item[6]}|{item[7]}|" \ | |
f"{item[8]}|{item[9]}|{item[10]}|{item[11]}|{item[12]}|{item[13]}|{item[14]}|{item[15]}|" \ | |
f"{item[16]}|{item[17]}|{item[18]}|{item[19]}" | |
arr.append(s) | |
return arr[-1] | |
@chrono("multiple string concat (+)") | |
def test_string_concat(): | |
arr = [] | |
for item in big_list: | |
s = item[0] + "|" + item[1] + "|" + item[2] + "|" + item[3] + "|" + item[4] + "|" + item[5] + "|" + item[ | |
6] + "|" + item[7] + "|" + item[8] + "|" + item[9] + "|" + item[10] + "|" + item[11] + "|" + item[ | |
12] + "|" + item[13] + "|" + item[14] + "|" + item[15] + "|" + item[16] + "|" + item[17] + "|" + item[ | |
18] + "|" + item[19] | |
arr.append(s) | |
return arr[-1] | |
@chrono("str.format()") | |
def test_format_function(): | |
arr = [] | |
for item in big_list: | |
s = "{0}|{1}|{2}|{3}|{4}|{5}|{6}|{7}|{8}|{9}|{10}|{11}|{12}|{13}|{14}|{15}|{16}|{17}|{18}|{19}".format( | |
item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8], item[9], item[10], | |
item[11], item[12], item[13], item[14], item[15], item[16], item[17], item[18], item[19]) | |
arr.append(s) | |
return arr[-1] | |
if __name__ == '__main__': | |
p = pathlib.Path(cache_file) | |
list_size = 500000 | |
if p.exists(): | |
load_list() | |
if not len(big_list) == list_size: | |
print("... rebuliding the list") | |
big_list = [] | |
build_list(list_size) | |
else: | |
build_list(list_size) | |
v1 = test_join() | |
v2 = test_percent_format() | |
v3 = test_fstring_format() | |
v4 = test_string_concat() | |
v5 = test_format_function() | |
success = v1 == v2 == v3 == v4 == v5 | |
if success: | |
print("Success: All functions generated the same results") | |
exit(0) | |
else: | |
print("Error: Output results are not the same:") | |
[print(v) for v in (v1, v2, v3, v4, v5)] | |
exit(1) |
result on FreeBSD 11.2 x64 on a Xeon CPU E5-4620 0 @ 2.20GHz
# python3.6 str-perf-test.py
[ elapsed 11.126 seconds ] - load cache file : 5000000 records
[ elapsed 7.999 seconds ] - str.join()
[ elapsed 18.283 seconds ] - percent formatting
[ elapsed 13.043 seconds ] - f-string formatting
[ elapsed 29.705 seconds ] - multiple string concat (+)
[ elapsed 25.400 seconds ] - str.format()
Success: All functions generated the same results
# python3.7 str-perf-test.py
[ elapsed 10.912 seconds ] - load cache file : 5000000 records
[ elapsed 7.898 seconds ] - str.join()
[ elapsed 18.089 seconds ] - percent formatting
[ elapsed 13.005 seconds ] - f-string formatting
[ elapsed 29.910 seconds ] - multiple string concat (+)
[ elapsed 25.110 seconds ] - str.format()
Success: All functions generated the same results
# pypy3 str-perf-test.py
[ elapsed 121.232 seconds ] - load cache file : 5000000 records
[ elapsed 15.889 seconds ] - str.join()
[ elapsed 24.872 seconds ] - percent formatting
[ elapsed 16.148 seconds ] - f-string formatting
[ elapsed 14.568 seconds ] - multiple string concat (+)
[ elapsed 17.107 seconds ] - str.format()
Success: All functions generated the same results
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
result on FreeBSD 11.2 x64 on a Xeon E5-2683 v3 @ 2.00GHz