Skip to content

Instantly share code, notes, and snippets.

@andreasvc
Created April 30, 2020 20:34
Show Gist options
  • Save andreasvc/aba3a5055ec780601a7e2ef83076f08d to your computer and use it in GitHub Desktop.
Save andreasvc/aba3a5055ec780601a7e2ef83076f08d to your computer and use it in GitHub Desktop.
import random
from timeit import timeit
import re
import re2
re_ip = re.compile(br'\d+\.\d+\.\d+\.\d+')
re2_ip = re2.compile(br'\d+\.\d+\.\d+\.\d+')
lines = ['.'.join(str(random.randint(1, 255)) for _ in range(4)).encode('utf8')
for _ in range(16000)]
lines += ['.'.join(str(random.randint(1, 255)) for _ in range(3)).encode('utf8')
for _ in range(16000)]
random.shuffle(lines)
data = b'\n'.join(lines)
exec_time = timeit('list(filter(re_ip.match, data.splitlines()))',
number=1, setup='from __main__ import re_ip, data')
exec_time2 = timeit('list(filter(re2_ip.match, data.splitlines()))',
number=1, setup='from __main__ import re2_ip, data')
exec_time2c = timeit('list(filter(re2_ip.contains, data.splitlines()))',
number=1, setup='from __main__ import re2_ip, data')
print('re match %.1f' % (exec_time * 1000))
print('re2 match %.1f' % (exec_time2 * 1000))
print('re2 contains %.1f' % (exec_time2c * 1000))
exec_time = timeit('re_ip.findall(data)',
number=1, setup='from __main__ import re_ip, data')
exec_time2 = timeit('re2_ip.findall(data)',
number=1, setup='from __main__ import re2_ip, data')
exec_time2 = timeit('re2_ip.count(data)',
number=1, setup='from __main__ import re2_ip, data')
print('re findall %.1f' % (exec_time * 1000))
print('re2 findall %.1f' % (exec_time2 * 1000))
print('re2 count %.1f' % (exec_time2 * 1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment