Skip to content

Instantly share code, notes, and snippets.

@teepark
Created March 24, 2011 22:05
Show Gist options
  • Save teepark/886005 to your computer and use it in GitHub Desktop.
Save teepark/886005 to your computer and use it in GitHub Desktop.
parse any dates in HTTP headers
'''
parse these: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3
'''
import datetime
import re
import pytz
#
# these regexes are straight translations of the BNF in the spec at 3.3.1
# (reverse order so as to meet dependencies the other way around)
#
_space = r"\s"
_digit = r"(\d)"
_2digit = r"(\d{2})"
_4digit = r"(\d{4})"
_month = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
_weekday = r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)"
_wkday = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
_time = ":".join((_2digit,) * 3)
_date3 = _month + _space + ("(?:%s|%s%s)" % (_2digit, _space, _digit))
_date2 = "-".join((_2digit, _month, _2digit))
_date1 = _space.join((_2digit, _month, _4digit))
_asctime = _space.join((_wkday, _date3, _time, _4digit))
_rfc850 = _space.join((_weekday + ",", _date2, _time, "GMT"))
_rfc1123 = _space.join((_wkday + ",", _date1, _time, "GMT"))
_httpdate = "^" + "|".join((_asctime, _rfc850, _rfc1123)) + "$"
_httpdate_re = re.compile(_httpdate)
_months = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
def parse(datestring, tz=None):
match = _httpdate_re.match(datestring)
if match is None:
return None
groups = match.groups()
if groups[15] is not None:
wkday, day, month, year, hour, minute, second = groups[15:]
day, hour, minute, second, year = map(
int, (day, hour, minute, second, year))
elif groups[8] is not None:
wkday, day, month, year, hour, minute, second = groups[8:15]
day, hour, minute, second, year = map(
int, (day, hour, minute, second, year))
# let's go with unix epoch as century split point. why not.
year += year < 70 and 2000 or 1900
elif groups[0] is not None:
wkday, month, day2, day1, hour, minute, second, year = groups[:8]
day, hour, minute, second, year = map(
int, (day2 or day1, hour, minute, second, year))
dt = datetime.datetime(
year, _months[month], day, hour, minute, second, tzinfo=pytz.UTC)
if tz:
return dt.astimezone(tz)
return dt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment