Skip to content

Instantly share code, notes, and snippets.

@mawillcockson
Created August 28, 2021 20:34
Show Gist options
  • Select an option

  • Save mawillcockson/ea887ff974e22e7012766aa640d488ad to your computer and use it in GitHub Desktop.

Select an option

Save mawillcockson/ea887ff974e22e7012766aa640d488ad to your computer and use it in GitHub Desktop.
Python String Concatenation Timing
import re
from pathlib import Path
from itertools import islice
from csv import DictWriter
from collections import defaultdict
def main():
results_path = Path("~/projects/time_python_string_concatenation_results.txt").expanduser().resolve(strict=True)
parse_path = Path("~/projects/time_python_string_concatenation_parsed_results.txt").expanduser().resolve()
parse_path.touch()
lines = results_path.read_text().splitlines()
runs = [list(islice(lines, i, i + 2)) for i in range(0, len(lines), 2)]
results = defaultdict(dict)
units = {
"usec": 1/1_000_000,
"msec": 1/1_000,
"sec": 1,
}
for run in runs:
first_line = re.match(r"^#(?P<attempt>\d+) - (?P<count>\d+)$", run[0])
count = int(first_line["count"])
attempt_number = int(first_line["attempt"])
time = re.search(r": (?P<value>\d+(\.\d+)?(e\d+)?) (?P<unit>.?sec)", run[-1])
parsed_time = float(time["value"]) * units[time["unit"]]
results[count][attempt_number] = parsed_time
with parse_path.open(mode="wt") as file:
writer = DictWriter(file, fieldnames=["count"] + list(results[list(results)[0]]))
writer.writeheader()
for count in results:
writer.writerow({"count": count, **results[count]})
if __name__ == "__main__":
main()
set -eu
ATTEMPT_1="
for _ in range(%s):
s += data
"
ATTEMPT_2="
for _ in range(%s):
l.append(data)
''.join(l)
"
ATTEMPT_3="
for _ in range(%s):
s = f'{s}{data}'
"
ATTEMPT_4="
for _ in range(%s):
d[_] = data
s = ''.join(d.values())
"
ATTEMPT_5="
for _ in range(%s):
s = ''.join((s,data))
"
ATTEMPT_6="
for _ in range(%s):
a.frombytes(data.encode('utf-8'))
a.tobytes().decode()
"
ATTEMPT_7="
for _ in range(%s):
au.fromunicode(data)
au.tounicode()
"
ATTEMPT_8="
for _ in range(%s):
d[data] = 0
s = ''.join(d)
"
for count in $(seq 1000 10000 101000); do
for attempt_number in $(seq 1 1 8); do
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')"
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "$count")"
echo "#$attempt_number - $count"
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s='';l=[];d={};data='a'" "$FORMATTED_ATTEMPT"
done
done
set -eu
ATTEMPT_1="
for _ in range(%s):
s += data
"
ATTEMPT_2="
for _ in range(%s):
l.append(data)
b''.join(l)
"
ATTEMPT_3="0"
ATTEMPT_4="
for _ in range(%s):
d[_] = data
s = b''.join(d.values())
"
ATTEMPT_5="
for _ in range(%s):
s = b''.join((s,data))
"
ATTEMPT_6="
for _ in range(%s):
a.frombytes(data)
s = a.tobytes()
"
ATTEMPT_7="
for _ in range(%s):
au.fromunicode(data.decode())
au.tobytes()
"
ATTEMPT_8="
for _ in range(%s):
d[data] = 0
s = b''.join(d)
"
for count in $(seq 1000 10000 101000); do
for attempt_number in $(seq 1 1 8); do
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')"
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "$count")"
echo "#$attempt_number - $count"
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s=b'';l=[];d={};data=b'a'" "$FORMATTED_ATTEMPT"
done
done
set -eu
ATTEMPT_1="
for _ in range(%s):
s += data
"
ATTEMPT_2="
for _ in range(%s):
l.append(data)
b''.join(l)
"
ATTEMPT_3="0"
ATTEMPT_4="
for _ in range(%s):
d[_] = data
s = b''.join(d.values())
"
ATTEMPT_5="
for _ in range(%s):
s = b''.join((s,data))
"
ATTEMPT_6="
for _ in range(%s):
a.frombytes(data)
s = a.tobytes()
"
ATTEMPT_7="
for _ in range(%s):
au.fromunicode(data.decode())
au.tobytes()
"
ATTEMPT_8="
for _ in range(%s):
d[data] = 0
s = b''.join(d)
"
for count in $(seq 10 10 100) $(seq 200 100 1000); do
for attempt_number in $(seq 1 1 8); do
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')"
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "10000")"
echo "#$attempt_number - $count"
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s=b'';l=[];d={};data=b'a'*$count" "$FORMATTED_ATTEMPT"
done
done
@mawillcockson
Copy link
Author

What is interesting is that:

list = []
list.append(data)

is slower than

dictionary = {}
dictionary[data] = 0

It makes sense to me how the fastest insertion of all is:

list = [data]
list[0] = data

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment