Created
December 8, 2016 11:05
-
-
Save neoshrew/0edb8f6fe9cc4b335b8864f4f84030b0 to your computer and use it in GitHub Desktop.
Testing speeds of different methods of parsing datetime strings (of a specific, similar but non ISO format) in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from dateutil.tz import tzoffset, tzutc | |
import re | |
THE_RE = re.compile( | |
"^" | |
"([0-9]{4})" | |
"-" | |
"([0-9]{2})" | |
"-" | |
"([0-9]){2}" | |
" " | |
"([0-9]{2})" | |
":" | |
"([0-9]{2})" | |
":" | |
"([0-9]{2})" | |
"\+" | |
"([0-9]{2})" | |
"([0-9]{2})" | |
"$" | |
) | |
def parse(dt_str): | |
# 2016-08-18 18:08:05+0000 | |
vals = THE_RE.match(dt_str).groups() | |
vals = list(int(i) for i in vals) | |
dt = datetime(*vals[:6]) | |
tz_vals = vals[6:] | |
if any(tz_vals): | |
tzinfo_ = tzoffset(None, (60*tz_vals[0])+tz_vals[1]) | |
else: | |
tzinfo_ = tzutc() | |
return dt.replace(tzinfo=tzinfo_) | |
if __name__ == '__main__': | |
try: | |
x = xrange | |
except NameError: | |
x = range | |
for i in x(100000): | |
parse('2016-08-18 18:08:05+0000') | |
parse('2016-08-18 18:08:05+0100') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dateutil.parser: 106, 107, | |
myre: 6.84, 8.85, | |
splitter: 5.04, 6.61, | |
strptime: 15.7, 17.7, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from dateutil.tz import tzoffset, tzutc | |
# 2016-08-18 18:08:05+0000 | |
DT_SLICES = ( | |
slice(None,4), | |
slice(5,7), | |
slice(8,10), | |
slice(11,13), | |
slice(14,16), | |
slice(17,19), | |
) | |
TZ_HR_SLICE = slice(20,22) | |
TZ_MN_SLICE = slice(22,None) | |
def parse(dt_str): | |
tz_hr = int(dt_str[TZ_HR_SLICE]) | |
tz_mn = int(dt_str[TZ_MN_SLICE]) | |
if tz_hr == tz_mn == 0: | |
tzinfo_ = tzutc() | |
else: | |
tzinfo_ = tzoffset(None, (60*tz_hr)+tz_mn) | |
return datetime(*(int(dt_str[slice_]) for slice_ in DT_SLICES), tzinfo=tzinfo_) | |
if __name__ == '__main__': | |
try: | |
x = xrange | |
except NameError: | |
x = range | |
for i in x(100000): | |
parse('2016-08-18 18:08:05+0000') | |
parse('2016-08-18 18:08:05+0100') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, tzinfo | |
from dateutil.tz import tzoffset, tzutc | |
def parse(dt_str): | |
# 2016-08-18 18:08:05+0000 | |
dt, tz_hr, tz_mn = dt_str[:19], dt_str[20:22], dt_str[22:] | |
tz_hr = int(tz_hr) | |
tz_mn = int(tz_mn) | |
if tz_hr == tz_mn == 0: | |
tzinfo_ = tzutc() | |
else: | |
tzinfo_ = tzoffset(None, (60*tz_hr)+tz_mn) | |
dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') | |
return dt.replace(tzinfo=tzinfo_) | |
if __name__ == '__main__': | |
try: | |
x = xrange | |
except NameError: | |
x = range | |
for i in x(100000): | |
parse('2016-08-18 18:08:05+0000') | |
parse('2016-08-18 18:08:05+0100') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for module in dateutil.parser $(ls *.py | cut -d '.' -f1) | |
do | |
printf '%s: ' $module; | |
for datetime in "2016-08-18 18:08:05+0000" "2016-08-18 18:08:05+0100"; | |
do | |
python -m timeit --setup "from $module import parse" "parse('${datetime}')" | \ | |
grep -Po '(?<=: )[0-9\.]+(?= usec per loop)' | xargs printf ' %s,'; | |
done; | |
echo; | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment