Created
January 21, 2013 03:13
-
-
Save cormacrelf/4583376 to your computer and use it in GitHub Desktop.
parsedatetime module in gist form.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Parse human-readable date/time text. | |
""" | |
__license__ = """ | |
Copyright (c) 2004-2008 Mike Taylor | |
Copyright (c) 2006-2008 Darshana Chhajed | |
All rights reserved. | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
""" | |
_debug = False | |
import re | |
import time | |
import datetime | |
import rfc822 | |
import parsedatetime_consts | |
# Copied from feedparser.py | |
# Universal Feedparser | |
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. | |
# Originally a def inside of _parse_date_w3dtf() | |
def _extract_date(m): | |
year = int(m.group('year')) | |
if year < 100: | |
year = 100 * int(time.gmtime()[0] / 100) + int(year) | |
if year < 1000: | |
return 0, 0, 0 | |
julian = m.group('julian') | |
if julian: | |
julian = int(julian) | |
month = julian / 30 + 1 | |
day = julian % 30 + 1 | |
jday = None | |
while jday != julian: | |
t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) | |
jday = time.gmtime(t)[-2] | |
diff = abs(jday - julian) | |
if jday > julian: | |
if diff < day: | |
day = day - diff | |
else: | |
month = month - 1 | |
day = 31 | |
elif jday < julian: | |
if day + diff < 28: | |
day = day + diff | |
else: | |
month = month + 1 | |
return year, month, day | |
month = m.group('month') | |
day = 1 | |
if month is None: | |
month = 1 | |
else: | |
month = int(month) | |
day = m.group('day') | |
if day: | |
day = int(day) | |
else: | |
day = 1 | |
return year, month, day | |
# Copied from feedparser.py | |
# Universal Feedparser | |
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. | |
# Originally a def inside of _parse_date_w3dtf() | |
def _extract_time(m): | |
if not m: | |
return 0, 0, 0 | |
hours = m.group('hours') | |
if not hours: | |
return 0, 0, 0 | |
hours = int(hours) | |
minutes = int(m.group('minutes')) | |
seconds = m.group('seconds') | |
if seconds: | |
seconds = int(seconds) | |
else: | |
seconds = 0 | |
return hours, minutes, seconds | |
# Copied from feedparser.py | |
# Universal Feedparser | |
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. | |
# Modified to return a tuple instead of mktime | |
# | |
# Original comment: | |
# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by | |
# Drake and licensed under the Python license. Removed all range checking | |
# for month, day, hour, minute, and second, since mktime will normalize | |
# these later | |
def _parse_date_w3dtf(dateString): | |
# the __extract_date and __extract_time methods were | |
# copied-out so they could be used by my code --bear | |
def __extract_tzd(m): | |
'''Return the Time Zone Designator as an offset in seconds from UTC.''' | |
if not m: | |
return 0 | |
tzd = m.group('tzd') | |
if not tzd: | |
return 0 | |
if tzd == 'Z': | |
return 0 | |
hours = int(m.group('tzdhours')) | |
minutes = m.group('tzdminutes') | |
if minutes: | |
minutes = int(minutes) | |
else: | |
minutes = 0 | |
offset = (hours*60 + minutes) * 60 | |
if tzd[0] == '+': | |
return -offset | |
return offset | |
__date_re = ('(?P<year>\d\d\d\d)' | |
'(?:(?P<dsep>-|)' | |
'(?:(?P<julian>\d\d\d)' | |
'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') | |
__tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' | |
__tzd_rx = re.compile(__tzd_re) | |
__time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' | |
'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' | |
+ __tzd_re) | |
__datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) | |
__datetime_rx = re.compile(__datetime_re) | |
m = __datetime_rx.match(dateString) | |
if (m is None) or (m.group() != dateString): return | |
return _extract_date(m) + _extract_time(m) + (0, 0, 0) | |
# Copied from feedparser.py | |
# Universal Feedparser | |
# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. | |
# Modified to return a tuple instead of mktime | |
# | |
def _parse_date_rfc822(dateString): | |
'''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' | |
data = dateString.split() | |
if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: | |
del data[0] | |
if len(data) == 4: | |
s = data[3] | |
i = s.find('+') | |
if i > 0: | |
data[3:] = [s[:i], s[i+1:]] | |
else: | |
data.append('') | |
dateString = " ".join(data) | |
if len(data) < 5: | |
dateString += ' 00:00:00 GMT' | |
return rfc822.parsedate_tz(dateString) | |
# rfc822.py defines several time zones, but we define some extra ones. | |
# 'ET' is equivalent to 'EST', etc. | |
_additional_timezones = {'AT': -400, 'ET': -500, | |
'CT': -600, 'MT': -700, | |
'PT': -800} | |
rfc822._timezones.update(_additional_timezones) | |
class Calendar: | |
""" | |
A collection of routines to input, parse and manipulate date and times. | |
The text can either be 'normal' date values or it can be human readable. | |
""" | |
def __init__(self, constants=None): | |
""" | |
Default constructor for the L{Calendar} class. | |
@type constants: object | |
@param constants: Instance of the class L{parsedatetime_consts.Constants} | |
@rtype: object | |
@return: L{Calendar} instance | |
""" | |
# if a constants reference is not included, use default | |
if constants is None: | |
self.ptc = parsedatetime_consts.Constants() | |
else: | |
self.ptc = constants | |
self.weekdyFlag = False # monday/tuesday/... | |
self.dateStdFlag = False # 07/21/06 | |
self.dateStrFlag = False # July 21st, 2006 | |
self.timeStdFlag = False # 5:50 | |
self.meridianFlag = False # am/pm | |
self.dayStrFlag = False # tomorrow/yesterday/today/.. | |
self.timeStrFlag = False # lunch/noon/breakfast/... | |
self.modifierFlag = False # after/before/prev/next/.. | |
self.modifier2Flag = False # after/before/prev/next/.. | |
self.unitsFlag = False # hrs/weeks/yrs/min/.. | |
self.qunitsFlag = False # h/m/t/d.. | |
self.timeFlag = 0 | |
self.dateFlag = 0 | |
def _convertUnitAsWords(self, unitText): | |
""" | |
Converts text units into their number value | |
Five = 5 | |
Twenty Five = 25 | |
Two hundred twenty five = 225 | |
Two thousand and twenty five = 2025 | |
Two thousand twenty five = 2025 | |
@type unitText: string | |
@param unitText: number text to convert | |
@rtype: integer | |
@return: numerical value of unitText | |
""" | |
# TODO: implement this | |
pass | |
def _buildTime(self, source, quantity, modifier, units): | |
""" | |
Take C{quantity}, C{modifier} and C{unit} strings and convert them into values. | |
After converting, calcuate the time and return the adjusted sourceTime. | |
@type source: time | |
@param source: time to use as the base (or source) | |
@type quantity: string | |
@param quantity: quantity string | |
@type modifier: string | |
@param modifier: how quantity and units modify the source time | |
@type units: string | |
@param units: unit of the quantity (i.e. hours, days, months, etc) | |
@rtype: struct_time | |
@return: C{struct_time} of the calculated time | |
""" | |
if _debug: | |
print '_buildTime: [%s][%s][%s]' % (quantity, modifier, units) | |
if source is None: | |
source = time.localtime() | |
if quantity is None: | |
quantity = '' | |
else: | |
quantity = quantity.strip() | |
if len(quantity) == 0: | |
qty = 1 | |
else: | |
try: | |
qty = int(quantity) | |
except ValueError: | |
qty = 0 | |
if modifier in self.ptc.Modifiers: | |
qty = qty * self.ptc.Modifiers[modifier] | |
if units is None or units == '': | |
units = 'dy' | |
# plurals are handled by regex's (could be a bug tho) | |
(yr, mth, dy, hr, mn, sec, _, _, _) = source | |
start = datetime.datetime(yr, mth, dy, hr, mn, sec) | |
target = start | |
if units.startswith('y'): | |
target = self.inc(start, year=qty) | |
self.dateFlag = 1 | |
elif units.endswith('th') or units.endswith('ths'): | |
target = self.inc(start, month=qty) | |
self.dateFlag = 1 | |
else: | |
if units.startswith('d'): | |
target = start + datetime.timedelta(days=qty) | |
self.dateFlag = 1 | |
elif units.startswith('h'): | |
target = start + datetime.timedelta(hours=qty) | |
self.timeFlag = 2 | |
elif units.startswith('m'): | |
target = start + datetime.timedelta(minutes=qty) | |
self.timeFlag = 2 | |
elif units.startswith('s'): | |
target = start + datetime.timedelta(seconds=qty) | |
self.timeFlag = 2 | |
elif units.startswith('w'): | |
target = start + datetime.timedelta(weeks=qty) | |
self.dateFlag = 1 | |
return target.timetuple() | |
def parseDate(self, dateString): | |
""" | |
Parse short-form date strings:: | |
'05/28/2006' or '04.21' | |
@type dateString: string | |
@param dateString: text to convert to a C{datetime} | |
@rtype: struct_time | |
@return: calculated C{struct_time} value of dateString | |
""" | |
yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() | |
# values pulled from regex's will be stored here and later | |
# assigned to mth, dy, yr based on information from the locale | |
# -1 is used as the marker value because we want zero values | |
# to be passed thru so they can be flagged as errors later | |
v1 = -1 | |
v2 = -1 | |
v3 = -1 | |
s = dateString | |
m = self.ptc.CRE_DATE2.search(s) | |
if m is not None: | |
index = m.start() | |
v1 = int(s[:index]) | |
s = s[index + 1:] | |
m = self.ptc.CRE_DATE2.search(s) | |
if m is not None: | |
index = m.start() | |
v2 = int(s[:index]) | |
v3 = int(s[index + 1:]) | |
else: | |
v2 = int(s.strip()) | |
v = [ v1, v2, v3 ] | |
d = { 'm': mth, 'd': dy, 'y': yr } | |
for i in range(0, 3): | |
n = v[i] | |
c = self.ptc.dp_order[i] | |
if n >= 0: | |
d[c] = n | |
# if the year is not specified and the date has already | |
# passed, increment the year | |
if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])): | |
yr = d['y'] + 1 | |
else: | |
yr = d['y'] | |
mth = d['m'] | |
dy = d['d'] | |
# birthday epoch constraint | |
if yr < self.ptc.BirthdayEpoch: | |
yr += 2000 | |
elif yr < 100: | |
yr += 1900 | |
if _debug: | |
print 'parseDate: ', yr, mth, dy, self.ptc.daysInMonth(mth, yr) | |
if (mth > 0 and mth <= 12) and \ | |
(dy > 0 and dy <= self.ptc.daysInMonth(mth, yr)): | |
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) | |
else: | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
sourceTime = time.localtime() # return current time if date | |
# string is invalid | |
return sourceTime | |
def parseDateText(self, dateString): | |
""" | |
Parse long-form date strings:: | |
'May 31st, 2006' | |
'Jan 1st' | |
'July 2006' | |
@type dateString: string | |
@param dateString: text to convert to a datetime | |
@rtype: struct_time | |
@return: calculated C{struct_time} value of dateString | |
""" | |
yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() | |
currentMth = mth | |
currentDy = dy | |
s = dateString.lower() | |
m = self.ptc.CRE_DATE3.search(s) | |
mth = m.group('mthname') | |
mth = self.ptc.MonthOffsets[mth] | |
if m.group('day') != None: | |
dy = int(m.group('day')) | |
else: | |
dy = 1 | |
if m.group('year') != None: | |
yr = int(m.group('year')) | |
# birthday epoch constraint | |
if yr < self.ptc.BirthdayEpoch: | |
yr += 2000 | |
elif yr < 100: | |
yr += 1900 | |
elif (mth < currentMth) or (mth == currentMth and dy < currentDy): | |
# if that day and month have already passed in this year, | |
# then increment the year by 1 | |
yr += 1 | |
if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr): | |
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) | |
else: | |
# Return current time if date string is invalid | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
sourceTime = time.localtime() | |
return sourceTime | |
def evalRanges(self, datetimeString, sourceTime=None): | |
""" | |
Evaluate the C{datetimeString} text and determine if | |
it represents a date or time range. | |
@type datetimeString: string | |
@param datetimeString: datetime text to evaluate | |
@type sourceTime: struct_time | |
@param sourceTime: C{struct_time} value to use as the base | |
@rtype: tuple | |
@return: tuple of: start datetime, end datetime and the invalid flag | |
""" | |
startTime = '' | |
endTime = '' | |
startDate = '' | |
endDate = '' | |
rangeFlag = 0 | |
s = datetimeString.strip().lower() | |
if self.ptc.rangeSep in s: | |
s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep) | |
s = s.replace(' ', ' ') | |
m = self.ptc.CRE_TIMERNG1.search(s) | |
if m is not None: | |
rangeFlag = 1 | |
else: | |
m = self.ptc.CRE_TIMERNG2.search(s) | |
if m is not None: | |
rangeFlag = 2 | |
else: | |
m = self.ptc.CRE_TIMERNG4.search(s) | |
if m is not None: | |
rangeFlag = 7 | |
else: | |
m = self.ptc.CRE_TIMERNG3.search(s) | |
if m is not None: | |
rangeFlag = 3 | |
else: | |
m = self.ptc.CRE_DATERNG1.search(s) | |
if m is not None: | |
rangeFlag = 4 | |
else: | |
m = self.ptc.CRE_DATERNG2.search(s) | |
if m is not None: | |
rangeFlag = 5 | |
else: | |
m = self.ptc.CRE_DATERNG3.search(s) | |
if m is not None: | |
rangeFlag = 6 | |
if _debug: | |
print 'evalRanges: rangeFlag =', rangeFlag, '[%s]' % s | |
if m is not None: | |
if (m.group() != s): | |
# capture remaining string | |
parseStr = m.group() | |
chunk1 = s[:m.start()] | |
chunk2 = s[m.end():] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = 1 | |
sourceTime, flag = self.parse(s, sourceTime) | |
if flag == 0: | |
sourceTime = None | |
else: | |
parseStr = s | |
if rangeFlag == 1: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) | |
endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startTime, endTime, 2) | |
elif rangeFlag == 2: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) | |
endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startTime, endTime, 2) | |
elif rangeFlag == 3 or rangeFlag == 7: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
# capturing the meridian from the end time | |
if self.ptc.usesMeridian: | |
ampm = re.search(self.ptc.am[0], parseStr) | |
# appending the meridian to the start time | |
if ampm is not None: | |
startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[0]), sourceTime) | |
else: | |
startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[1]), sourceTime) | |
else: | |
startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) | |
endTime, eflag = self.parse(parseStr[(m.start() + 1):], sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startTime, endTime, 2) | |
elif rangeFlag == 4: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
startDate, sflag = self.parse((parseStr[:m.start()]), sourceTime) | |
endDate, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startDate, endDate, 1) | |
elif rangeFlag == 5: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
endDate = parseStr[(m.start() + 1):] | |
# capturing the year from the end date | |
date = self.ptc.CRE_DATE3.search(endDate) | |
endYear = date.group('year') | |
# appending the year to the start date if the start date | |
# does not have year information and the end date does. | |
# eg : "Aug 21 - Sep 4, 2007" | |
if endYear is not None: | |
startDate = (parseStr[:m.start()]).strip() | |
date = self.ptc.CRE_DATE3.search(startDate) | |
startYear = date.group('year') | |
if startYear is None: | |
startDate = startDate + ', ' + endYear | |
else: | |
startDate = parseStr[:m.start()] | |
startDate, sflag = self.parse(startDate, sourceTime) | |
endDate, eflag = self.parse(endDate, sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startDate, endDate, 1) | |
elif rangeFlag == 6: | |
m = re.search(self.ptc.rangeSep, parseStr) | |
startDate = parseStr[:m.start()] | |
# capturing the month from the start date | |
mth = self.ptc.CRE_DATE3.search(startDate) | |
mth = mth.group('mthname') | |
# appending the month name to the end date | |
endDate = mth + parseStr[(m.start() + 1):] | |
startDate, sflag = self.parse(startDate, sourceTime) | |
endDate, eflag = self.parse(endDate, sourceTime) | |
if (eflag != 0) and (sflag != 0): | |
return (startDate, endDate, 1) | |
else: | |
# if range is not found | |
sourceTime = time.localtime() | |
return (sourceTime, sourceTime, 0) | |
def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle): | |
""" | |
Based on the C{style} and C{currentDayStyle} determine what | |
day-of-week value is to be returned. | |
@type wd: integer | |
@param wd: day-of-week value for the current day | |
@type wkdy: integer | |
@param wkdy: day-of-week value for the parsed day | |
@type offset: integer | |
@param offset: offset direction for any modifiers (-1, 0, 1) | |
@type style: integer | |
@param style: normally the value set in C{Constants.DOWParseStyle} | |
@type currentDayStyle: integer | |
@param currentDayStyle: normally the value set in C{Constants.CurrentDOWParseStyle} | |
@rtype: integer | |
@return: calculated day-of-week | |
""" | |
if offset == 1: | |
# modifier is indicating future week eg: "next". | |
# DOW is calculated as DOW of next week | |
diff = 7 - wd + wkdy | |
elif offset == -1: | |
# modifier is indicating past week eg: "last","previous" | |
# DOW is calculated as DOW of previous week | |
diff = wkdy - wd - 7 | |
elif offset == 0: | |
# modifier is indiacting current week eg: "this" | |
# DOW is calculated as DOW of this week | |
diff = wkdy - wd | |
elif offset == 2: | |
# no modifier is present. | |
# i.e. string to be parsed is just DOW | |
if style == 1: | |
# next occurance of the DOW is calculated | |
if currentDayStyle == True: | |
if wkdy >= wd: | |
diff = wkdy - wd | |
else: | |
diff = 7 - wd + wkdy | |
else: | |
if wkdy > wd: | |
diff = wkdy - wd | |
else: | |
diff = 7 - wd + wkdy | |
elif style == -1: | |
# last occurance of the DOW is calculated | |
if currentDayStyle == True: | |
if wkdy <= wd: | |
diff = wkdy - wd | |
else: | |
diff = wkdy - wd - 7 | |
else: | |
if wkdy < wd: | |
diff = wkdy - wd | |
else: | |
diff = wkdy - wd - 7 | |
else: | |
# occurance of the DOW in the current week is calculated | |
diff = wkdy - wd | |
if _debug: | |
print "wd %s, wkdy %s, offset %d, style %d\n" % (wd, wkdy, offset, style) | |
return diff | |
def _evalModifier(self, modifier, chunk1, chunk2, sourceTime): | |
""" | |
Evaluate the C{modifier} string and following text (passed in | |
as C{chunk1} and C{chunk2}) and if they match any known modifiers | |
calculate the delta and apply it to C{sourceTime}. | |
@type modifier: string | |
@param modifier: modifier text to apply to sourceTime | |
@type chunk1: string | |
@param chunk1: first text chunk that followed modifier (if any) | |
@type chunk2: string | |
@param chunk2: second text chunk that followed modifier (if any) | |
@type sourceTime: struct_time | |
@param sourceTime: C{struct_time} value to use as the base | |
@rtype: tuple | |
@return: tuple of: remaining text and the modified sourceTime | |
""" | |
offset = self.ptc.Modifiers[modifier] | |
if sourceTime is not None: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime | |
else: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() | |
# capture the units after the modifier and the remaining | |
# string after the unit | |
m = self.ptc.CRE_REMAINING.search(chunk2) | |
if m is not None: | |
index = m.start() + 1 | |
unit = chunk2[:m.start()] | |
chunk2 = chunk2[index:] | |
else: | |
unit = chunk2 | |
chunk2 = '' | |
flag = False | |
if unit == 'month' or \ | |
unit == 'mth' or \ | |
unit == 'm': | |
if offset == 0: | |
dy = self.ptc.daysInMonth(mth, yr) | |
sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst) | |
elif offset == 2: | |
# if day is the last day of the month, calculate the last day | |
# of the next month | |
if dy == self.ptc.daysInMonth(mth, yr): | |
dy = self.ptc.daysInMonth(mth + 1, yr) | |
start = datetime.datetime(yr, mth, dy, 9, 0, 0) | |
target = self.inc(start, month=1) | |
sourceTime = target.timetuple() | |
else: | |
start = datetime.datetime(yr, mth, 1, 9, 0, 0) | |
target = self.inc(start, month=offset) | |
sourceTime = target.timetuple() | |
flag = True | |
self.dateFlag = 1 | |
if unit == 'week' or \ | |
unit == 'wk' or \ | |
unit == 'w': | |
if offset == 0: | |
start = datetime.datetime(yr, mth, dy, 17, 0, 0) | |
target = start + datetime.timedelta(days=(4 - wd)) | |
sourceTime = target.timetuple() | |
elif offset == 2: | |
start = datetime.datetime(yr, mth, dy, 9, 0, 0) | |
target = start + datetime.timedelta(days=7) | |
sourceTime = target.timetuple() | |
else: | |
return self._evalModifier(modifier, chunk1, "monday " + chunk2, sourceTime) | |
flag = True | |
self.dateFlag = 1 | |
if unit == 'day' or \ | |
unit == 'dy' or \ | |
unit == 'd': | |
if offset == 0: | |
sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst) | |
self.timeFlag = 2 | |
elif offset == 2: | |
start = datetime.datetime(yr, mth, dy, hr, mn, sec) | |
target = start + datetime.timedelta(days=1) | |
sourceTime = target.timetuple() | |
else: | |
start = datetime.datetime(yr, mth, dy, 9, 0, 0) | |
target = start + datetime.timedelta(days=offset) | |
sourceTime = target.timetuple() | |
flag = True | |
self.dateFlag = 1 | |
if unit == 'hour' or \ | |
unit == 'hr': | |
if offset == 0: | |
sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst) | |
else: | |
start = datetime.datetime(yr, mth, dy, hr, 0, 0) | |
target = start + datetime.timedelta(hours=offset) | |
sourceTime = target.timetuple() | |
flag = True | |
self.timeFlag = 2 | |
if unit == 'year' or \ | |
unit == 'yr' or \ | |
unit == 'y': | |
if offset == 0: | |
sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst) | |
elif offset == 2: | |
sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst) | |
else: | |
sourceTime = (yr + offset, 1, 1, 9, 0, 0, wd, yd, isdst) | |
flag = True | |
self.dateFlag = 1 | |
if flag == False: | |
m = self.ptc.CRE_WEEKDAY.match(unit) | |
if m is not None: | |
wkdy = m.group() | |
self.dateFlag = 1 | |
if modifier == 'eod': | |
# Calculate the upcoming weekday | |
self.modifierFlag = False | |
(sourceTime, _) = self.parse(wkdy, sourceTime) | |
sources = self.ptc.buildSources(sourceTime) | |
self.timeFlag = 2 | |
if modifier in sources: | |
sourceTime = sources[modifier] | |
else: | |
wkdy = self.ptc.WeekdayOffsets[wkdy] | |
diff = self._CalculateDOWDelta(wd, wkdy, offset, | |
self.ptc.DOWParseStyle, | |
self.ptc.CurrentDOWParseStyle) | |
start = datetime.datetime(yr, mth, dy, 9, 0, 0) | |
target = start + datetime.timedelta(days=diff) | |
sourceTime = target.timetuple() | |
flag = True | |
self.dateFlag = 1 | |
if not flag: | |
m = self.ptc.CRE_TIME.match(unit) | |
if m is not None: | |
self.modifierFlag = False | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst), _ = self.parse(unit) | |
start = datetime.datetime(yr, mth, dy, hr, mn, sec) | |
target = start + datetime.timedelta(days=offset) | |
sourceTime = target.timetuple() | |
flag = True | |
else: | |
self.modifierFlag = False | |
# check if the remaining text is parsable and if so, | |
# use it as the base time for the modifier source time | |
t, flag2 = self.parse('%s %s' % (chunk1, unit), sourceTime) | |
if flag2 != 0: | |
sourceTime = t | |
sources = self.ptc.buildSources(sourceTime) | |
if modifier in sources: | |
sourceTime = sources[modifier] | |
flag = True | |
self.timeFlag = 2 | |
# if the word after next is a number, the string is more than likely | |
# to be "next 4 hrs" which we will have to combine the units with the | |
# rest of the string | |
if not flag: | |
if offset < 0: | |
# if offset is negative, the unit has to be made negative | |
unit = '-%s' % unit | |
chunk2 = '%s %s' % (unit, chunk2) | |
self.modifierFlag = False | |
#return '%s %s' % (chunk1, chunk2), sourceTime | |
return '%s' % chunk2, sourceTime | |
def _evalModifier2(self, modifier, chunk1 , chunk2, sourceTime): | |
""" | |
Evaluate the C{modifier} string and following text (passed in | |
as C{chunk1} and C{chunk2}) and if they match any known modifiers | |
calculate the delta and apply it to C{sourceTime}. | |
@type modifier: string | |
@param modifier: modifier text to apply to C{sourceTime} | |
@type chunk1: string | |
@param chunk1: first text chunk that followed modifier (if any) | |
@type chunk2: string | |
@param chunk2: second text chunk that followed modifier (if any) | |
@type sourceTime: struct_time | |
@param sourceTime: C{struct_time} value to use as the base | |
@rtype: tuple | |
@return: tuple of: remaining text and the modified sourceTime | |
""" | |
offset = self.ptc.Modifiers[modifier] | |
digit = r'\d+' | |
self.modifier2Flag = False | |
# If the string after the negative modifier starts with digits, | |
# then it is likely that the string is similar to ' before 3 days' | |
# or 'evening prior to 3 days'. | |
# In this case, the total time is calculated by subtracting '3 days' | |
# from the current date. | |
# So, we have to identify the quantity and negate it before parsing | |
# the string. | |
# This is not required for strings not starting with digits since the | |
# string is enough to calculate the sourceTime | |
if chunk2 != '': | |
if offset < 0: | |
m = re.match(digit, chunk2.strip()) | |
if m is not None: | |
qty = int(m.group()) * -1 | |
chunk2 = chunk2[m.end():] | |
chunk2 = '%d%s' % (qty, chunk2) | |
sourceTime, flag1 = self.parse(chunk2, sourceTime) | |
if flag1 == 0: | |
flag1 = True | |
else: | |
flag1 = False | |
flag2 = False | |
else: | |
flag1 = False | |
if chunk1 != '': | |
if offset < 0: | |
m = re.search(digit, chunk1.strip()) | |
if m is not None: | |
qty = int(m.group()) * -1 | |
chunk1 = chunk1[m.end():] | |
chunk1 = '%d%s' % (qty, chunk1) | |
tempDateFlag = self.dateFlag | |
tempTimeFlag = self.timeFlag | |
sourceTime2, flag2 = self.parse(chunk1, sourceTime) | |
else: | |
return sourceTime, (flag1 and flag2) | |
# if chunk1 is not a datetime and chunk2 is then do not use datetime | |
# value returned by parsing chunk1 | |
if not (flag1 == False and flag2 == 0): | |
sourceTime = sourceTime2 | |
else: | |
self.timeFlag = tempTimeFlag | |
self.dateFlag = tempDateFlag | |
return sourceTime, (flag1 and flag2) | |
def _evalString(self, datetimeString, sourceTime=None): | |
""" | |
Calculate the datetime based on flags set by the L{parse()} routine | |
Examples handled:: | |
RFC822, W3CDTF formatted dates | |
HH:MM[:SS][ am/pm] | |
MM/DD/YYYY | |
DD MMMM YYYY | |
@type datetimeString: string | |
@param datetimeString: text to try and parse as more "traditional" | |
date/time text | |
@type sourceTime: struct_time | |
@param sourceTime: C{struct_time} value to use as the base | |
@rtype: datetime | |
@return: calculated C{struct_time} value or current C{struct_time} | |
if not parsed | |
""" | |
s = datetimeString.strip() | |
now = time.localtime() | |
# Given string date is a RFC822 date | |
if sourceTime is None: | |
sourceTime = _parse_date_rfc822(s) | |
if sourceTime is not None: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime | |
self.dateFlag = 1 | |
if (hr != 0) and (mn != 0) and (sec != 0): | |
self.timeFlag = 2 | |
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) | |
# Given string date is a W3CDTF date | |
if sourceTime is None: | |
sourceTime = _parse_date_w3dtf(s) | |
if sourceTime is not None: | |
self.dateFlag = 1 | |
self.timeFlag = 2 | |
if sourceTime is None: | |
s = s.lower() | |
# Given string is in the format HH:MM(:SS)(am/pm) | |
if self.meridianFlag: | |
if sourceTime is None: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now | |
else: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime | |
m = self.ptc.CRE_TIMEHMS2.search(s) | |
if m is not None: | |
dt = s[:m.start('meridian')].strip() | |
if len(dt) <= 2: | |
hr = int(dt) | |
mn = 0 | |
sec = 0 | |
else: | |
hr, mn, sec = _extract_time(m) | |
if hr == 24: | |
hr = 0 | |
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) | |
meridian = m.group('meridian').lower() | |
# if 'am' found and hour is 12 - force hour to 0 (midnight) | |
if (meridian in self.ptc.am) and hr == 12: | |
sourceTime = (yr, mth, dy, 0, mn, sec, wd, yd, isdst) | |
# if 'pm' found and hour < 12, add 12 to shift to evening | |
if (meridian in self.ptc.pm) and hr < 12: | |
sourceTime = (yr, mth, dy, hr + 12, mn, sec, wd, yd, isdst) | |
# invalid time | |
if hr > 24 or mn > 59 or sec > 59: | |
sourceTime = now | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
self.meridianFlag = False | |
# Given string is in the format HH:MM(:SS) | |
if self.timeStdFlag: | |
if sourceTime is None: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now | |
else: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime | |
m = self.ptc.CRE_TIMEHMS.search(s) | |
if m is not None: | |
hr, mn, sec = _extract_time(m) | |
if hr == 24: | |
hr = 0 | |
if hr > 24 or mn > 59 or sec > 59: | |
# invalid time | |
sourceTime = now | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
else: | |
sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) | |
self.timeStdFlag = False | |
# Given string is in the format 07/21/2006 | |
if self.dateStdFlag: | |
sourceTime = self.parseDate(s) | |
self.dateStdFlag = False | |
# Given string is in the format "May 23rd, 2005" | |
if self.dateStrFlag: | |
sourceTime = self.parseDateText(s) | |
self.dateStrFlag = False | |
# Given string is a weekday | |
if self.weekdyFlag: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now | |
start = datetime.datetime(yr, mth, dy, hr, mn, sec) | |
wkdy = self.ptc.WeekdayOffsets[s] | |
if wkdy > wd: | |
qty = self._CalculateDOWDelta(wd, wkdy, 2, | |
self.ptc.DOWParseStyle, | |
self.ptc.CurrentDOWParseStyle) | |
else: | |
qty = self._CalculateDOWDelta(wd, wkdy, 2, | |
self.ptc.DOWParseStyle, | |
self.ptc.CurrentDOWParseStyle) | |
target = start + datetime.timedelta(days=qty) | |
wd = wkdy | |
sourceTime = target.timetuple() | |
self.weekdyFlag = False | |
# Given string is a natural language time string like | |
# lunch, midnight, etc | |
if self.timeStrFlag: | |
if s in self.ptc.re_values['now']: | |
sourceTime = now | |
else: | |
sources = self.ptc.buildSources(sourceTime) | |
if s in sources: | |
sourceTime = sources[s] | |
else: | |
sourceTime = now | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
self.timeStrFlag = False | |
# Given string is a natural language date string like today, tomorrow.. | |
if self.dayStrFlag: | |
if sourceTime is None: | |
sourceTime = now | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime | |
if s in self.ptc.dayOffsets: | |
offset = self.ptc.dayOffsets[s] | |
else: | |
offset = 0 | |
start = datetime.datetime(yr, mth, dy, 9, 0, 0) | |
target = start + datetime.timedelta(days=offset) | |
sourceTime = target.timetuple() | |
self.dayStrFlag = False | |
# Given string is a time string with units like "5 hrs 30 min" | |
if self.unitsFlag: | |
modifier = '' # TODO | |
if sourceTime is None: | |
sourceTime = now | |
m = self.ptc.CRE_UNITS.search(s) | |
if m is not None: | |
units = m.group('units') | |
quantity = s[:m.start('units')] | |
sourceTime = self._buildTime(sourceTime, quantity, modifier, units) | |
self.unitsFlag = False | |
# Given string is a time string with single char units like "5 h 30 m" | |
if self.qunitsFlag: | |
modifier = '' # TODO | |
if sourceTime is None: | |
sourceTime = now | |
m = self.ptc.CRE_QUNITS.search(s) | |
if m is not None: | |
units = m.group('qunits') | |
quantity = s[:m.start('qunits')] | |
sourceTime = self._buildTime(sourceTime, quantity, modifier, units) | |
self.qunitsFlag = False | |
# Given string does not match anything | |
if sourceTime is None: | |
sourceTime = now | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
return sourceTime | |
def parse(self, datetimeString, sourceTime=None): | |
""" | |
Splits the given C{datetimeString} into tokens, finds the regex | |
patterns that match and then calculates a C{struct_time} value from | |
the chunks. | |
If C{sourceTime} is given then the C{struct_time} value will be | |
calculated from that value, otherwise from the current date/time. | |
If the C{datetimeString} is parsed and date/time value found then | |
the second item of the returned tuple will be a flag to let you know | |
what kind of C{struct_time} value is being returned:: | |
0 = not parsed at all | |
1 = parsed as a C{date} | |
2 = parsed as a C{time} | |
3 = parsed as a C{datetime} | |
@type datetimeString: string | |
@param datetimeString: date/time text to evaluate | |
@type sourceTime: struct_time | |
@param sourceTime: C{struct_time} value to use as the base | |
@rtype: tuple | |
@return: tuple of: modified C{sourceTime} and the result flag | |
""" | |
if sourceTime: | |
if isinstance(sourceTime, datetime.datetime): | |
if _debug: | |
print 'coercing datetime to timetuple' | |
sourceTime = sourceTime.timetuple() | |
else: | |
if not isinstance(sourceTime, time.struct_time) and \ | |
not isinstance(sourceTime, tuple): | |
raise Exception('sourceTime is not a struct_time') | |
s = datetimeString.strip().lower() | |
parseStr = '' | |
totalTime = sourceTime | |
if s == '' : | |
if sourceTime is not None: | |
return (sourceTime, self.dateFlag + self.timeFlag) | |
else: | |
return (time.localtime(), 0) | |
self.timeFlag = 0 | |
self.dateFlag = 0 | |
while len(s) > 0: | |
flag = False | |
chunk1 = '' | |
chunk2 = '' | |
if _debug: | |
print 'parse (top of loop): [%s][%s]' % (s, parseStr) | |
if parseStr == '': | |
# Modifier like next\prev.. | |
m = self.ptc.CRE_MODIFIER.search(s) | |
if m is not None: | |
self.modifierFlag = True | |
if (m.group('modifier') != s): | |
# capture remaining string | |
parseStr = m.group('modifier') | |
chunk1 = s[:m.start('modifier')].strip() | |
chunk2 = s[m.end('modifier'):].strip() | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Modifier like from\after\prior.. | |
m = self.ptc.CRE_MODIFIER2.search(s) | |
if m is not None: | |
self.modifier2Flag = True | |
if (m.group('modifier') != s): | |
# capture remaining string | |
parseStr = m.group('modifier') | |
chunk1 = s[:m.start('modifier')].strip() | |
chunk2 = s[m.end('modifier'):].strip() | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
valid_date = False | |
for match in self.ptc.CRE_DATE3.finditer(s): | |
# to prevent "HH:MM(:SS) time strings" expressions from triggering | |
# this regex, we checks if the month field exists in the searched | |
# expression, if it doesn't exist, the date field is not valid | |
if match.group('mthname'): | |
m = self.ptc.CRE_DATE3.search(s, match.start()) | |
valid_date = True | |
break | |
# String date format | |
if valid_date: | |
self.dateStrFlag = True | |
self.dateFlag = 1 | |
if (m.group('date') != s): | |
# capture remaining string | |
parseStr = m.group('date') | |
chunk1 = s[:m.start('date')] | |
chunk2 = s[m.end('date'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Standard date format | |
m = self.ptc.CRE_DATE.search(s) | |
if m is not None: | |
self.dateStdFlag = True | |
self.dateFlag = 1 | |
if (m.group('date') != s): | |
# capture remaining string | |
parseStr = m.group('date') | |
chunk1 = s[:m.start('date')] | |
chunk2 = s[m.end('date'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Natural language day strings | |
m = self.ptc.CRE_DAY.search(s) | |
if m is not None: | |
self.dayStrFlag = True | |
self.dateFlag = 1 | |
if (m.group('day') != s): | |
# capture remaining string | |
parseStr = m.group('day') | |
chunk1 = s[:m.start('day')] | |
chunk2 = s[m.end('day'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Quantity + Units | |
m = self.ptc.CRE_UNITS.search(s) | |
if m is not None: | |
self.unitsFlag = True | |
if (m.group('qty') != s): | |
# capture remaining string | |
parseStr = m.group('qty') | |
chunk1 = s[:m.start('qty')].strip() | |
chunk2 = s[m.end('qty'):].strip() | |
if chunk1[-1:] == '-': | |
parseStr = '-%s' % parseStr | |
chunk1 = chunk1[:-1] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Quantity + Units | |
m = self.ptc.CRE_QUNITS.search(s) | |
if m is not None: | |
self.qunitsFlag = True | |
if (m.group('qty') != s): | |
# capture remaining string | |
parseStr = m.group('qty') | |
chunk1 = s[:m.start('qty')].strip() | |
chunk2 = s[m.end('qty'):].strip() | |
if chunk1[-1:] == '-': | |
parseStr = '-%s' % parseStr | |
chunk1 = chunk1[:-1] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Weekday | |
m = self.ptc.CRE_WEEKDAY.search(s) | |
if m is not None: | |
gv = m.group('weekday') | |
if s not in self.ptc.dayOffsets: | |
self.weekdyFlag = True | |
self.dateFlag = 1 | |
if (gv != s): | |
# capture remaining string | |
parseStr = gv | |
chunk1 = s[:m.start('weekday')] | |
chunk2 = s[m.end('weekday'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# Natural language time strings | |
m = self.ptc.CRE_TIME.search(s) | |
if m is not None: | |
self.timeStrFlag = True | |
self.timeFlag = 2 | |
if (m.group('time') != s): | |
# capture remaining string | |
parseStr = m.group('time') | |
chunk1 = s[:m.start('time')] | |
chunk2 = s[m.end('time'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
else: | |
parseStr = s | |
if parseStr == '': | |
# HH:MM(:SS) am/pm time strings | |
m = self.ptc.CRE_TIMEHMS2.search(s) | |
if m is not None: | |
self.meridianFlag = True | |
self.timeFlag = 2 | |
if m.group('minutes') is not None: | |
if m.group('seconds') is not None: | |
parseStr = '%s:%s:%s %s' % (m.group('hours'), | |
m.group('minutes'), | |
m.group('seconds'), | |
m.group('meridian')) | |
else: | |
parseStr = '%s:%s %s' % (m.group('hours'), | |
m.group('minutes'), | |
m.group('meridian')) | |
else: | |
parseStr = '%s %s' % (m.group('hours'), | |
m.group('meridian')) | |
chunk1 = s[:m.start('hours')] | |
chunk2 = s[m.end('meridian'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
if parseStr == '': | |
# HH:MM(:SS) time strings | |
m = self.ptc.CRE_TIMEHMS.search(s) | |
if m is not None: | |
self.timeStdFlag = True | |
self.timeFlag = 2 | |
if m.group('seconds') is not None: | |
parseStr = '%s:%s:%s' % (m.group('hours'), | |
m.group('minutes'), | |
m.group('seconds')) | |
chunk1 = s[:m.start('hours')] | |
chunk2 = s[m.end('seconds'):] | |
else: | |
parseStr = '%s:%s' % (m.group('hours'), | |
m.group('minutes')) | |
chunk1 = s[:m.start('hours')] | |
chunk2 = s[m.end('minutes'):] | |
s = '%s %s' % (chunk1, chunk2) | |
flag = True | |
# if string does not match any regex, empty string to | |
# come out of the while loop | |
if not flag: | |
s = '' | |
if _debug: | |
print 'parse (bottom) [%s][%s][%s][%s]' % (s, parseStr, chunk1, chunk2) | |
print 'weekday %s, dateStd %s, dateStr %s, time %s, timeStr %s, meridian %s' % \ | |
(self.weekdyFlag, self.dateStdFlag, self.dateStrFlag, self.timeStdFlag, self.timeStrFlag, self.meridianFlag) | |
print 'dayStr %s, modifier %s, modifier2 %s, units %s, qunits %s' % \ | |
(self.dayStrFlag, self.modifierFlag, self.modifier2Flag, self.unitsFlag, self.qunitsFlag) | |
# evaluate the matched string | |
if parseStr != '': | |
if self.modifierFlag == True: | |
t, totalTime = self._evalModifier(parseStr, chunk1, chunk2, totalTime) | |
# t is the unparsed part of the chunks. | |
# If it is not date/time, return current | |
# totalTime as it is; else return the output | |
# after parsing t. | |
if (t != '') and (t != None): | |
tempDateFlag = self.dateFlag | |
tempTimeFlag = self.timeFlag | |
(totalTime2, flag) = self.parse(t, totalTime) | |
if flag == 0 and totalTime is not None: | |
self.timeFlag = tempTimeFlag | |
self.dateFlag = tempDateFlag | |
return (totalTime, self.dateFlag + self.timeFlag) | |
else: | |
return (totalTime2, self.dateFlag + self.timeFlag) | |
elif self.modifier2Flag == True: | |
totalTime, invalidFlag = self._evalModifier2(parseStr, chunk1, chunk2, totalTime) | |
if invalidFlag == True: | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
else: | |
totalTime = self._evalString(parseStr, totalTime) | |
parseStr = '' | |
# String is not parsed at all | |
if totalTime is None or totalTime == sourceTime: | |
totalTime = time.localtime() | |
self.dateFlag = 0 | |
self.timeFlag = 0 | |
return (totalTime, self.dateFlag + self.timeFlag) | |
def inc(self, source, month=None, year=None): | |
""" | |
Takes the given C{source} date, or current date if none is | |
passed, and increments it according to the values passed in | |
by month and/or year. | |
This routine is needed because Python's C{timedelta()} function | |
does not allow for month or year increments. | |
@type source: struct_time | |
@param source: C{struct_time} value to increment | |
@type month: integer | |
@param month: optional number of months to increment | |
@type year: integer | |
@param year: optional number of years to increment | |
@rtype: datetime | |
@return: C{source} incremented by the number of months and/or years | |
""" | |
yr = source.year | |
mth = source.month | |
dy = source.day | |
if year: | |
try: | |
yi = int(year) | |
except ValueError: | |
yi = 0 | |
yr += yi | |
if month: | |
try: | |
mi = int(month) | |
except ValueError: | |
mi = 0 | |
m = abs(mi) | |
y = m / 12 # how many years are in month increment | |
m = m % 12 # get remaining months | |
if mi < 0: | |
mth = mth - m # sub months from start month | |
if mth < 1: # cross start-of-year? | |
y -= 1 # yes - decrement year | |
mth += 12 # and fix month | |
else: | |
mth = mth + m # add months to start month | |
if mth > 12: # cross end-of-year? | |
y += 1 # yes - increment year | |
mth -= 12 # and fix month | |
yr += y | |
# if the day ends up past the last day of | |
# the new month, set it to the last day | |
if dy > self.ptc.daysInMonth(mth, yr): | |
dy = self.ptc.daysInMonth(mth, yr) | |
d = source.replace(year=yr, month=mth, day=dy) | |
return source + (d - source) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
parsedatetime constants and helper functions to determine | |
regex values from Locale information if present. | |
Also contains the internal Locale classes to give some sane | |
defaults if PyICU is not found. | |
""" | |
__license__ = """ | |
Copyright (c) 2004-2008 Mike Taylor | |
Copyright (c) 2006-2008 Darshana Chhajed | |
Copyright (c) 2007 Bernd Zeimetz <[email protected]> | |
All rights reserved. | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
""" | |
try: | |
import PyICU as pyicu | |
except: | |
pyicu = None | |
import datetime | |
import calendar | |
import time | |
import re | |
class pdtLocale_en: | |
""" | |
en_US Locale constants | |
This class will be used to initialize L{Constants} if PyICU is not located. | |
Defined as class variables are the lists and strings needed by parsedatetime | |
to evaluate strings for USA | |
""" | |
localeID = 'en_US' # don't use a unicode string | |
dateSep = [ u'/', u'.' ] | |
timeSep = [ u':' ] | |
meridian = [ u'AM', u'PM' ] | |
usesMeridian = True | |
uses24 = False | |
Weekdays = [ u'monday', u'tuesday', u'wednesday', | |
u'thursday', u'friday', u'saturday', u'sunday', | |
] | |
shortWeekdays = [ u'mon', u'tues', u'wed', | |
u'thu', u'fri', u'sat', u'sun', | |
] | |
Months = [ u'january', u'february', u'march', | |
u'april', u'may', u'june', | |
u'july', u'august', u'september', | |
u'october', u'november', u'december', | |
] | |
shortMonths = [ u'jan', u'feb', u'mar', | |
u'apr', u'may', u'jun', | |
u'jul', u'aug', u'sep', | |
u'oct', u'nov', u'dec', | |
] | |
dateFormats = { 'full': 'EEEE, MMMM d, yyyy', | |
'long': 'MMMM d, yyyy', | |
'medium': 'MMM d, yyyy', | |
'short': 'M/d/yy', | |
} | |
timeFormats = { 'full': 'h:mm:ss a z', | |
'long': 'h:mm:ss a z', | |
'medium': 'h:mm:ss a', | |
'short': 'h:mm a', | |
} | |
dp_order = [ u'm', u'd', u'y' ] | |
# this will be added to re_consts later | |
units = { 'seconds': [ 'second', 'sec' ], | |
'minutes': [ 'minute', 'min' ], | |
'hours': [ 'hour', 'hr' ], | |
'days': [ 'day', 'dy' ], | |
'weeks': [ 'week', 'wk' ], | |
'months': [ 'month', 'mth' ], | |
'years': [ 'year', 'yr' ], | |
} | |
# text constants to be used by regex's later | |
re_consts = { 'specials': 'in|on|of|at', | |
'timeseperator': ':', | |
'rangeseperator': '-', | |
'daysuffix': 'rd|st|nd|th', | |
'meridian': 'am|pm|a.m.|p.m.|a|p', | |
'qunits': 'h|m|s|d|w|m|y', | |
'now': [ 'now' ], | |
} | |
# Used to adjust the returned date before/after the source | |
modifiers = { 'from': 1, | |
'before': -1, | |
'after': 1, | |
'ago': -1, | |
'prior': -1, | |
'prev': -1, | |
'last': -1, | |
'next': 1, | |
'previous': -1, | |
'in a': 2, | |
'end of': 0, | |
'eod': 0, | |
'eo': 0 | |
} | |
dayoffsets = { 'tomorrow': 1, | |
'today': 0, | |
'yesterday': -1, | |
} | |
# special day and/or times, i.e. lunch, noon, evening | |
# each element in the dictionary is a dictionary that is used | |
# to fill in any value to be replace - the current date/time will | |
# already have been populated by the method buildSources | |
re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, | |
'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, | |
'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, | |
'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, | |
'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, | |
'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, | |
} | |
class pdtLocale_au: | |
""" | |
en_AU Locale constants | |
This class will be used to initialize L{Constants} if PyICU is not located. | |
Defined as class variables are the lists and strings needed by parsedatetime | |
to evaluate strings for Australia | |
""" | |
localeID = 'en_AU' # don't use a unicode string | |
dateSep = [ u'-', u'/' ] | |
timeSep = [ u':' ] | |
meridian = [ u'AM', u'PM' ] | |
usesMeridian = True | |
uses24 = False | |
Weekdays = [ u'monday', u'tuesday', u'wednesday', | |
u'thursday', u'friday', u'saturday', u'sunday', | |
] | |
shortWeekdays = [ u'mon', u'tues', u'wed', | |
u'thu', u'fri', u'sat', u'sun', | |
] | |
Months = [ u'january', u'february', u'march', | |
u'april', u'may', u'june', | |
u'july', u'august', u'september', | |
u'october', u'november', u'december', | |
] | |
shortMonths = [ u'jan', u'feb', u'mar', | |
u'apr', u'may', u'jun', | |
u'jul', u'aug', u'sep', | |
u'oct', u'nov', u'dec', | |
] | |
dateFormats = { 'full': 'EEEE, d MMMM yyyy', | |
'long': 'd MMMM yyyy', | |
'medium': 'dd/MM/yyyy', | |
'short': 'd/MM/yy', | |
} | |
timeFormats = { 'full': 'h:mm:ss a z', | |
'long': 'h:mm:ss a', | |
'medium': 'h:mm:ss a', | |
'short': 'h:mm a', | |
} | |
dp_order = [ u'd', u'm', u'y' ] | |
# this will be added to re_consts later | |
units = { 'seconds': [ 'second', 'sec' ], | |
'minutes': [ 'minute', 'min' ], | |
'hours': [ 'hour', 'hr' ], | |
'days': [ 'day', 'dy' ], | |
'weeks': [ 'week', 'wk' ], | |
'months': [ 'month', 'mth' ], | |
'years': [ 'year', 'yr' ], | |
} | |
# text constants to be used by regex's later | |
re_consts = { 'specials': 'in|on|of|at', | |
'timeseperator': ':', | |
'rangeseperator': '-', | |
'daysuffix': 'rd|st|nd|th', | |
'meridian': 'am|pm|a.m.|p.m.|a|p', | |
'qunits': 'h|m|s|d|w|m|y', | |
'now': [ 'now' ], | |
} | |
# Used to adjust the returned date before/after the source | |
modifiers = { 'from': 1, | |
'before': -1, | |
'after': 1, | |
'ago': 1, | |
'prior': -1, | |
'prev': -1, | |
'last': -1, | |
'next': 1, | |
'previous': -1, | |
'in a': 2, | |
'end of': 0, | |
'eo': 0, | |
} | |
dayoffsets = { 'tomorrow': 1, | |
'today': 0, | |
'yesterday': -1, | |
} | |
# special day and/or times, i.e. lunch, noon, evening | |
# each element in the dictionary is a dictionary that is used | |
# to fill in any value to be replace - the current date/time will | |
# already have been populated by the method buildSources | |
re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, | |
'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, | |
'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, | |
'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, | |
'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, | |
'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, | |
} | |
class pdtLocale_es: | |
""" | |
es Locale constants | |
This class will be used to initialize L{Constants} if PyICU is not located. | |
Defined as class variables are the lists and strings needed by parsedatetime | |
to evaluate strings in Spanish | |
Note that I don't speak Spanish so many of the items below are still in English | |
""" | |
localeID = 'es' # don't use a unicode string | |
dateSep = [ u'/' ] | |
timeSep = [ u':' ] | |
meridian = [] | |
usesMeridian = False | |
uses24 = True | |
Weekdays = [ u'lunes', u'martes', u'mi\xe9rcoles', | |
u'jueves', u'viernes', u's\xe1bado', u'domingo', | |
] | |
shortWeekdays = [ u'lun', u'mar', u'mi\xe9', | |
u'jue', u'vie', u's\xe1b', u'dom', | |
] | |
Months = [ u'enero', u'febrero', u'marzo', | |
u'abril', u'mayo', u'junio', | |
u'julio', u'agosto', u'septiembre', | |
u'octubre', u'noviembre', u'diciembre' | |
] | |
shortMonths = [ u'ene', u'feb', u'mar', | |
u'abr', u'may', u'jun', | |
u'jul', u'ago', u'sep', | |
u'oct', u'nov', u'dic' | |
] | |
dateFormats = { 'full': "EEEE d' de 'MMMM' de 'yyyy", | |
'long': "d' de 'MMMM' de 'yyyy", | |
'medium': "dd-MMM-yy", | |
'short': "d/MM/yy", | |
} | |
timeFormats = { 'full': "HH'H'mm' 'ss z", | |
'long': "HH:mm:ss z", | |
'medium': "HH:mm:ss", | |
'short': "HH:mm", | |
} | |
dp_order = [ u'd', u'm', u'y' ] | |
# this will be added to re_consts later | |
units = { 'seconds': [ 'second', 'sec' ], | |
'minutes': [ 'minute', 'min' ], | |
'hours': [ 'hour', 'hr' ], | |
'days': [ 'day', 'dy' ], | |
'weeks': [ 'week', 'wk' ], | |
'months': [ 'month', 'mth' ], | |
'years': [ 'year', 'yr' ], | |
} | |
# text constants to be used by regex's later | |
re_consts = { 'specials': 'in|on|of|at', | |
'timeseperator': timeSep, | |
'dateseperator': dateSep, | |
'rangeseperator': '-', | |
'daysuffix': 'rd|st|nd|th', | |
'qunits': 'h|m|s|d|w|m|y', | |
'now': [ 'now' ], | |
} | |
# Used to adjust the returned date before/after the source | |
modifiers = { 'from': 1, | |
'before': -1, | |
'after': 1, | |
'ago': 1, | |
'prior': -1, | |
'prev': -1, | |
'last': -1, | |
'next': 1, | |
'previous': -1, | |
'in a': 2, | |
'end of': 0, | |
'eo': 0, | |
} | |
dayoffsets = { 'tomorrow': 1, | |
'today': 0, | |
'yesterday': -1, | |
} | |
# special day and/or times, i.e. lunch, noon, evening | |
# each element in the dictionary is a dictionary that is used | |
# to fill in any value to be replace - the current date/time will | |
# already have been populated by the method buildSources | |
re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, | |
'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, | |
'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, | |
'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, | |
'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, | |
'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, | |
} | |
class pdtLocale_de: | |
""" | |
de_DE Locale constants | |
This class will be used to initialize L{Constants} if PyICU is not located. | |
Contributed by Debian parsedatetime package maintainer Bernd Zeimetz <[email protected]> | |
Defined as class variables are the lists and strings needed by parsedatetime | |
to evaluate strings for German | |
""" | |
localeID = 'de_DE' # don't use a unicode string | |
dateSep = [ u'.' ] | |
timeSep = [ u':' ] | |
meridian = [ ] | |
usesMeridian = False | |
uses24 = True | |
Weekdays = [ u'montag', u'dienstag', u'mittwoch', | |
u'donnerstag', u'freitag', u'samstag', u'sonntag', | |
] | |
shortWeekdays = [ u'mo', u'di', u'mi', | |
u'do', u'fr', u'sa', u'so', | |
] | |
Months = [ u'januar', u'februar', u'm\xe4rz', | |
u'april', u'mai', u'juni', | |
u'juli', u'august', u'september', | |
u'oktober', u'november', u'dezember', | |
] | |
shortMonths = [ u'jan', u'feb', u'mrz', | |
u'apr', u'mai', u'jun', | |
u'jul', u'aug', u'sep', | |
u'okt', u'nov', u'dez', | |
] | |
dateFormats = { 'full': u'EEEE, d. MMMM yyyy', | |
'long': u'd. MMMM yyyy', | |
'medium': u'dd.MM.yyyy', | |
'short': u'dd.MM.yy' | |
} | |
timeFormats = { 'full': u'HH:mm:ss v', | |
'long': u'HH:mm:ss z', | |
'medium': u'HH:mm:ss', | |
'short': u'HH:mm' | |
} | |
dp_order = [ u'd', u'm', u'y' ] | |
# this will be added to re_consts later | |
units = { 'seconds': [ 'sekunden', 'sek', 's' ], | |
'minutes': [ 'minuten', 'min' , 'm' ], | |
'hours': [ 'stunden', 'std', 'h' ], | |
'days': [ 'tage', 't' ], | |
'weeks': [ 'wochen', 'w' ], | |
'months': [ 'monate' ], #the short version would be a capital M, | |
#as I understand it we can't distinguis | |
#between m for minutes and M for months. | |
'years': [ 'jahre', 'j' ], | |
} | |
# text constants to be used by regex's later | |
re_consts = { 'specials': 'am|dem|der|im|in|den|zum', | |
'timeseperator': ':', | |
'rangeseperator': '-', | |
'daysuffix': '', | |
'qunits': 'h|m|s|t|w|m|j', | |
'now': [ 'jetzt' ], | |
} | |
# Used to adjust the returned date before/after the source | |
#still looking for insight on how to translate all of them to german. | |
modifiers = { u'from': 1, | |
u'before': -1, | |
u'after': 1, | |
u'vergangener': -1, | |
u'vorheriger': -1, | |
u'prev': -1, | |
u'letzter': -1, | |
u'n\xe4chster': 1, | |
u'dieser': 0, | |
u'previous': -1, | |
u'in a': 2, | |
u'end of': 0, | |
u'eod': 0, | |
u'eo': 0, | |
} | |
#morgen/abermorgen does not work, see http://code.google.com/p/parsedatetime/issues/detail?id=19 | |
dayoffsets = { u'morgen': 1, | |
u'heute': 0, | |
u'gestern': -1, | |
u'vorgestern': -2, | |
u'\xfcbermorgen': 2, | |
} | |
# special day and/or times, i.e. lunch, noon, evening | |
# each element in the dictionary is a dictionary that is used | |
# to fill in any value to be replace - the current date/time will | |
# already have been populated by the method buildSources | |
re_sources = { u'mittag': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
u'mittags': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
u'mittagessen': { 'hr': 12, 'mn': 0, 'sec': 0 }, | |
u'morgen': { 'hr': 6, 'mn': 0, 'sec': 0 }, | |
u'morgens': { 'hr': 6, 'mn': 0, 'sec': 0 }, | |
u'fr\e4hst\xe4ck': { 'hr': 8, 'mn': 0, 'sec': 0 }, | |
u'abendessen': { 'hr': 19, 'mn': 0, 'sec': 0 }, | |
u'abend': { 'hr': 18, 'mn': 0, 'sec': 0 }, | |
u'abends': { 'hr': 18, 'mn': 0, 'sec': 0 }, | |
u'mitternacht': { 'hr': 0, 'mn': 0, 'sec': 0 }, | |
u'nacht': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
u'nachts': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
u'heute abend': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
u'heute nacht': { 'hr': 21, 'mn': 0, 'sec': 0 }, | |
u'feierabend': { 'hr': 17, 'mn': 0, 'sec': 0 }, | |
} | |
pdtLocales = { 'en_US': pdtLocale_en, | |
'en_AU': pdtLocale_au, | |
'es_ES': pdtLocale_es, | |
'de_DE': pdtLocale_de, | |
} | |
def _initLocale(ptc): | |
""" | |
Helper function to initialize the different lists and strings | |
from either PyICU or one of the internal pdt Locales and store | |
them into ptc. | |
""" | |
def lcase(x): | |
return x.lower() | |
if pyicu and ptc.usePyICU: | |
ptc.icuLocale = None | |
if ptc.localeID is not None: | |
ptc.icuLocale = pyicu.Locale(ptc.localeID) | |
if ptc.icuLocale is None: | |
for id in range(0, len(ptc.fallbackLocales)): | |
ptc.localeID = ptc.fallbackLocales[id] | |
ptc.icuLocale = pyicu.Locale(ptc.localeID) | |
if ptc.icuLocale is not None: | |
break | |
ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale) | |
# grab ICU list of weekdays, skipping first entry which | |
# is always blank | |
wd = map(lcase, ptc.icuSymbols.getWeekdays()[1:]) | |
swd = map(lcase, ptc.icuSymbols.getShortWeekdays()[1:]) | |
# store them in our list with Monday first (ICU puts Sunday first) | |
ptc.Weekdays = wd[1:] + wd[0:1] | |
ptc.shortWeekdays = swd[1:] + swd[0:1] | |
ptc.Months = map(lcase, ptc.icuSymbols.getMonths()) | |
ptc.shortMonths = map(lcase, ptc.icuSymbols.getShortMonths()) | |
# not quite sure how to init this so for now | |
# set it to none so it will be set to the en_US defaults for now | |
ptc.re_consts = None | |
ptc.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, ptc.icuLocale), | |
'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, ptc.icuLocale), | |
'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), | |
'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, ptc.icuLocale), | |
} | |
ptc.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, ptc.icuLocale), | |
'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, ptc.icuLocale), | |
'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), | |
'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, ptc.icuLocale), | |
} | |
ptc.dateFormats = { 'full': ptc.icu_df['full'].toPattern(), | |
'long': ptc.icu_df['long'].toPattern(), | |
'medium': ptc.icu_df['medium'].toPattern(), | |
'short': ptc.icu_df['short'].toPattern(), | |
} | |
ptc.timeFormats = { 'full': ptc.icu_tf['full'].toPattern(), | |
'long': ptc.icu_tf['long'].toPattern(), | |
'medium': ptc.icu_tf['medium'].toPattern(), | |
'short': ptc.icu_tf['short'].toPattern(), | |
} | |
else: | |
if not ptc.localeID in pdtLocales: | |
for id in range(0, len(ptc.fallbackLocales)): | |
ptc.localeID = ptc.fallbackLocales[id] | |
if ptc.localeID in pdtLocales: | |
break | |
ptc.locale = pdtLocales[ptc.localeID] | |
ptc.usePyICU = False | |
ptc.Weekdays = ptc.locale.Weekdays | |
ptc.shortWeekdays = ptc.locale.shortWeekdays | |
ptc.Months = ptc.locale.Months | |
ptc.shortMonths = ptc.locale.shortMonths | |
ptc.dateFormats = ptc.locale.dateFormats | |
ptc.timeFormats = ptc.locale.timeFormats | |
# these values are used to setup the various bits | |
# of the regex values used to parse | |
# | |
# check if a local set of constants has been | |
# provided, if not use en_US as the default | |
if ptc.localeID in pdtLocales: | |
ptc.re_sources = pdtLocales[ptc.localeID].re_sources | |
ptc.re_values = pdtLocales[ptc.localeID].re_consts | |
units = pdtLocales[ptc.localeID].units | |
ptc.Modifiers = pdtLocales[ptc.localeID].modifiers | |
ptc.dayOffsets = pdtLocales[ptc.localeID].dayoffsets | |
# for now, pull over any missing keys from the US set | |
for key in pdtLocales['en_US'].re_consts: | |
if not key in ptc.re_values: | |
ptc.re_values[key] = pdtLocales['en_US'].re_consts[key] | |
else: | |
ptc.re_sources = pdtLocales['en_US'].re_sources | |
ptc.re_values = pdtLocales['en_US'].re_consts | |
ptc.Modifiers = pdtLocales['en_US'].modifiers | |
ptc.dayOffsets = pdtLocales['en_US'].dayoffsets | |
units = pdtLocales['en_US'].units | |
# escape any regex special characters that may be found | |
wd = tuple(map(re.escape, ptc.Weekdays)) | |
swd = tuple(map(re.escape, ptc.shortWeekdays)) | |
mth = tuple(map(re.escape, ptc.Months)) | |
smth = tuple(map(re.escape, ptc.shortMonths)) | |
ptc.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth | |
ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth | |
ptc.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % wd | |
ptc.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % swd | |
l = [] | |
for unit in units: | |
l.append('|'.join(units[unit])) | |
ptc.re_values['units'] = '|'.join(l) | |
ptc.Units = ptc.re_values['units'].split('|') | |
def _initSymbols(ptc): | |
""" | |
Helper function to initialize the single character constants | |
and other symbols needed. | |
""" | |
ptc.timeSep = [ u':' ] | |
ptc.dateSep = [ u'/' ] | |
ptc.meridian = [ u'AM', u'PM' ] | |
ptc.usesMeridian = True | |
ptc.uses24 = False | |
if pyicu and ptc.usePyICU: | |
am = u'' | |
pm = u'' | |
ts = '' | |
# ICU doesn't seem to provide directly the | |
# date or time seperator - so we have to | |
# figure it out | |
o = ptc.icu_tf['short'] | |
s = ptc.timeFormats['short'] | |
ptc.usesMeridian = u'a' in s | |
ptc.uses24 = u'H' in s | |
# '11:45 AM' or '11:45' | |
s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) | |
# ': AM' or ':' | |
s = s.replace('11', '').replace('45', '') | |
if len(s) > 0: | |
ts = s[0] | |
if ptc.usesMeridian: | |
# '23:45 AM' or '23:45' | |
am = s[1:].strip() | |
s = o.format(datetime.datetime(2003, 10, 30, 23, 45)) | |
if ptc.uses24: | |
s = s.replace('23', '') | |
else: | |
s = s.replace('11', '') | |
# 'PM' or '' | |
pm = s.replace('45', '').replace(ts, '').strip() | |
ptc.timeSep = [ ts ] | |
ptc.meridian = [ am, pm ] | |
o = ptc.icu_df['short'] | |
s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) | |
s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '') | |
if len(s) > 0: | |
ds = s[0] | |
else: | |
ds = '/' | |
ptc.dateSep = [ ds ] | |
s = ptc.dateFormats['short'] | |
l = s.lower().split(ds) | |
dp_order = [] | |
for s in l: | |
if len(s) > 0: | |
dp_order.append(s[:1]) | |
ptc.dp_order = dp_order | |
else: | |
ptc.timeSep = ptc.locale.timeSep | |
ptc.dateSep = ptc.locale.dateSep | |
ptc.meridian = ptc.locale.meridian | |
ptc.usesMeridian = ptc.locale.usesMeridian | |
ptc.uses24 = ptc.locale.uses24 | |
ptc.dp_order = ptc.locale.dp_order | |
# build am and pm lists to contain | |
# original case, lowercase and first-char | |
# versions of the meridian text | |
if len(ptc.meridian) > 0: | |
am = ptc.meridian[0] | |
ptc.am = [ am ] | |
if len(am) > 0: | |
ptc.am.append(am[0]) | |
am = am.lower() | |
ptc.am.append(am) | |
ptc.am.append(am[0]) | |
else: | |
am = '' | |
ptc.am = [ '', '' ] | |
if len(ptc.meridian) > 1: | |
pm = ptc.meridian[1] | |
ptc.pm = [ pm ] | |
if len(pm) > 0: | |
ptc.pm.append(pm[0]) | |
pm = pm.lower() | |
ptc.pm.append(pm) | |
ptc.pm.append(pm[0]) | |
else: | |
pm = '' | |
ptc.pm = [ '', '' ] | |
def _initPatterns(ptc): | |
""" | |
Helper function to take the different localized bits from ptc and | |
create the regex strings. | |
""" | |
# TODO add code to parse the date formats and build the regexes up from sub-parts | |
# TODO find all hard-coded uses of date/time seperators | |
ptc.RE_DATE4 = r'''(?P<date>(((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?(,)?(\s)?) | |
(?P<mthname>(%(months)s|%(shortmonths)s))\s? | |
(?P<year>\d\d(\d\d)?)? | |
) | |
)''' % ptc.re_values | |
# I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16 | |
# I suspect the final line was for a trailing time - but testing shows it's not needed | |
# ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s? | |
# ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)? | |
# (,\s?(?P<year>\d\d(\d\d)?))?)) | |
# (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_DATE3 = r'''(?P<date>( | |
(((?P<mthname>(%(months)s|%(shortmonths)s))| | |
((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?))(\s)?){1,2} | |
((,)?(\s)?(?P<year>\d\d(\d\d)?))? | |
) | |
)''' % ptc.re_values | |
ptc.RE_MONTH = r'''(\s?|^) | |
(?P<month>( | |
(?P<mthname>(%(months)s|%(shortmonths)s)) | |
(\s?(?P<year>(\d\d\d\d)))? | |
)) | |
(\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_WEEKDAY = r'''(\s?|^) | |
(?P<weekday>(%(days)s|%(shortdays)s)) | |
(\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values | |
ptc.RE_UNITS = r'''(?P<qty>(-?\d+\s* | |
(?P<units>((%(units)s)s?)) | |
))''' % ptc.re_values | |
ptc.RE_QUNITS = r'''(?P<qty>(-?\d+\s? | |
(?P<qunits>%(qunits)s) | |
(\s?|,|$) | |
))''' % ptc.re_values | |
ptc.RE_MODIFIER = r'''(\s?|^) | |
(?P<modifier> | |
(previous|prev|last|next|eod|eo|(end\sof)|(in\sa)))''' % ptc.re_values | |
ptc.RE_MODIFIER2 = r'''(\s?|^) | |
(?P<modifier> | |
(from|before|after|ago|prior)) | |
(\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_TIMEHMS = r'''(\s?|^) | |
(?P<hours>\d\d?) | |
(?P<tsep>%(timeseperator)s|) | |
(?P<minutes>\d\d) | |
(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % ptc.re_values | |
ptc.RE_TIMEHMS2 = r'''(?P<hours>(\d\d?)) | |
((?P<tsep>%(timeseperator)s|) | |
(?P<minutes>(\d\d?)) | |
(?:(?P=tsep) | |
(?P<seconds>\d\d? | |
(?:[.,]\d+)?))?)?''' % ptc.re_values | |
if 'meridian' in ptc.re_values: | |
ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values | |
dateSeps = ''.join(ptc.dateSep) + '.' | |
ptc.RE_DATE = r'''(\s?|^) | |
(?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?)) | |
(\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps) | |
ptc.RE_DATE2 = r'[%s]' % dateSeps | |
ptc.RE_DAY = r'''(\s?|^) | |
(?P<day>(today|tomorrow|yesterday)) | |
(\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_DAY2 = r'''(?P<day>\d\d?)|(?P<suffix>%(daysuffix)s) | |
''' % ptc.re_values | |
ptc.RE_TIME = r'''(\s?|^) | |
(?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now)) | |
(\s?|$|[^0-9a-zA-Z])''' % ptc.re_values | |
ptc.RE_REMAINING = r'\s+' | |
# Regex for date/time ranges | |
ptc.RE_RTIMEHMS = r'''(\s?|^) | |
(\d\d?)%(timeseperator)s | |
(\d\d) | |
(%(timeseperator)s(\d\d))? | |
(\s?|$)''' % ptc.re_values | |
ptc.RE_RTIMEHMS2 = r'''(\s?|^) | |
(\d\d?) | |
(%(timeseperator)s(\d\d?))? | |
(%(timeseperator)s(\d\d?))?''' % ptc.re_values | |
if 'meridian' in ptc.re_values: | |
ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values | |
ptc.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps | |
ptc.RE_RDATE3 = r'''((((%(months)s))\s? | |
((\d\d?) | |
(\s?|%(daysuffix)s|$)+)? | |
(,\s?\d\d\d\d)?))''' % ptc.re_values | |
# "06/07/06 - 08/09/06" | |
ptc.DATERNG1 = ptc.RE_RDATE + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE | |
ptc.DATERNG1 = ptc.DATERNG1 % ptc.re_values | |
# "march 31 - june 1st, 2006" | |
ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE3 | |
ptc.DATERNG2 = ptc.DATERNG2 % ptc.re_values | |
# "march 1rd -13th" | |
ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?' | |
ptc.DATERNG3 = ptc.DATERNG3 % ptc.re_values | |
# "4:00:55 pm - 5:90:44 am", '4p-5p' | |
ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 | |
ptc.TIMERNG1 = ptc.TIMERNG1 % ptc.re_values | |
# "4:00 - 5:90 ", "4:55:55-3:44:55" | |
ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS | |
ptc.TIMERNG2 = ptc.TIMERNG2 % ptc.re_values | |
# "4-5pm " | |
ptc.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 | |
ptc.TIMERNG3 = ptc.TIMERNG3 % ptc.re_values | |
# "4:30-5pm " | |
ptc.TIMERNG4 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 | |
ptc.TIMERNG4 = ptc.TIMERNG4 % ptc.re_values | |
def _initConstants(ptc): | |
""" | |
Create localized versions of the units, week and month names | |
""" | |
# build weekday offsets - yes, it assumes the Weekday and shortWeekday | |
# lists are in the same order and Mon..Sun (Python style) | |
ptc.WeekdayOffsets = {} | |
o = 0 | |
for key in ptc.Weekdays: | |
ptc.WeekdayOffsets[key] = o | |
o += 1 | |
o = 0 | |
for key in ptc.shortWeekdays: | |
ptc.WeekdayOffsets[key] = o | |
o += 1 | |
# build month offsets - yes, it assumes the Months and shortMonths | |
# lists are in the same order and Jan..Dec | |
ptc.MonthOffsets = {} | |
o = 1 | |
for key in ptc.Months: | |
ptc.MonthOffsets[key] = o | |
o += 1 | |
o = 1 | |
for key in ptc.shortMonths: | |
ptc.MonthOffsets[key] = o | |
o += 1 | |
# ptc.DaySuffixes = ptc.re_consts['daysuffix'].split('|') | |
class Constants: | |
""" | |
Default set of constants for parsedatetime. | |
If PyICU is present, then the class will first try to get PyICU | |
to return a locale specified by C{localeID}. If either C{localeID} is | |
None or if the locale does not exist within PyICU, then each of the | |
locales defined in C{fallbackLocales} is tried in order. | |
If PyICU is not present or none of the specified locales can be used, | |
then the class will initialize itself to the en_US locale. | |
if PyICU is not present or not requested, only the locales defined by | |
C{pdtLocales} will be searched. | |
""" | |
def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']): | |
self.localeID = localeID | |
self.fallbackLocales = fallbackLocales | |
if 'en_US' not in self.fallbackLocales: | |
self.fallbackLocales.append('en_US') | |
# define non-locale specific constants | |
self.locale = None | |
self.usePyICU = usePyICU | |
# starting cache of leap years | |
# daysInMonth will add to this if during | |
# runtime it gets a request for a year not found | |
self._leapYears = [ 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, | |
1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, | |
1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, | |
2036, 2040, 2044, 2048, 2052, 2056, 2060, 2064, 2068, 2072, 2076, | |
2080, 2084, 2088, 2092, 2096 ] | |
self.Second = 1 | |
self.Minute = 60 * self.Second | |
self.Hour = 60 * self.Minute | |
self.Day = 24 * self.Hour | |
self.Week = 7 * self.Day | |
self.Month = 30 * self.Day | |
self.Year = 365 * self.Day | |
self.rangeSep = u'-' | |
self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) | |
self.BirthdayEpoch = 50 | |
# DOWParseStyle controls how we parse "Tuesday" | |
# If the current day was Thursday and the text to parse is "Tuesday" | |
# then the following table shows how each style would be returned | |
# -1, 0, +1 | |
# | |
# Current day marked as *** | |
# | |
# Sun Mon Tue Wed Thu Fri Sat | |
# week -1 | |
# current -1,0 *** | |
# week +1 +1 | |
# | |
# If the current day was Monday and the text to parse is "Tuesday" | |
# then the following table shows how each style would be returned | |
# -1, 0, +1 | |
# | |
# Sun Mon Tue Wed Thu Fri Sat | |
# week -1 -1 | |
# current *** 0,+1 | |
# week +1 | |
self.DOWParseStyle = 1 | |
# CurrentDOWParseStyle controls how we parse "Friday" | |
# If the current day was Friday and the text to parse is "Friday" | |
# then the following table shows how each style would be returned | |
# True/False. This also depends on DOWParseStyle. | |
# | |
# Current day marked as *** | |
# | |
# DOWParseStyle = 0 | |
# Sun Mon Tue Wed Thu Fri Sat | |
# week -1 | |
# current T,F | |
# week +1 | |
# | |
# DOWParseStyle = -1 | |
# Sun Mon Tue Wed Thu Fri Sat | |
# week -1 F | |
# current T | |
# week +1 | |
# | |
# DOWParseStyle = +1 | |
# | |
# Sun Mon Tue Wed Thu Fri Sat | |
# week -1 | |
# current T | |
# week +1 F | |
self.CurrentDOWParseStyle = False | |
# initalize attributes to empty values to ensure | |
# they are defined | |
self.re_sources = None | |
self.re_values = None | |
self.Modifiers = None | |
self.dayOffsets = None | |
self.WeekdayOffsets = None | |
self.MonthOffsets = None | |
self.dateSep = None | |
self.timeSep = None | |
self.am = None | |
self.pm = None | |
self.meridian = None | |
self.usesMeridian = None | |
self.uses24 = None | |
self.dp_order = None | |
self.RE_DATE4 = r'' | |
self.RE_DATE3 = r'' | |
self.RE_MONTH = r'' | |
self.RE_WEEKDAY = r'' | |
self.RE_SPECIAL = r'' | |
self.RE_UNITS = r'' | |
self.RE_QUNITS = r'' | |
self.RE_MODIFIER = r'' | |
self.RE_MODIFIER2 = r'' | |
self.RE_TIMEHMS = r'' | |
self.RE_TIMEHMS2 = r'' | |
self.RE_DATE = r'' | |
self.RE_DATE2 = r'' | |
self.RE_DAY = r'' | |
self.RE_DAY2 = r'' | |
self.RE_TIME = r'' | |
self.RE_REMAINING = r'' | |
self.RE_RTIMEHMS = r'' | |
self.RE_RTIMEHMS2 = r'' | |
self.RE_RDATE = r'' | |
self.RE_RDATE3 = r'' | |
self.DATERNG1 = r'' | |
self.DATERNG2 = r'' | |
self.DATERNG3 = r'' | |
self.TIMERNG1 = r'' | |
self.TIMERNG2 = r'' | |
self.TIMERNG3 = r'' | |
self.TIMERNG4 = r'' | |
_initLocale(self) | |
_initConstants(self) | |
_initSymbols(self) | |
_initPatterns(self) | |
self.re_option = re.IGNORECASE + re.VERBOSE | |
self.cre_source = { 'CRE_SPECIAL': self.RE_SPECIAL, | |
'CRE_UNITS': self.RE_UNITS, | |
'CRE_QUNITS': self.RE_QUNITS, | |
'CRE_MODIFIER': self.RE_MODIFIER, | |
'CRE_MODIFIER2': self.RE_MODIFIER2, | |
'CRE_TIMEHMS': self.RE_TIMEHMS, | |
'CRE_TIMEHMS2': self.RE_TIMEHMS2, | |
'CRE_DATE': self.RE_DATE, | |
'CRE_DATE2': self.RE_DATE2, | |
'CRE_DATE3': self.RE_DATE3, | |
'CRE_DATE4': self.RE_DATE4, | |
'CRE_MONTH': self.RE_MONTH, | |
'CRE_WEEKDAY': self.RE_WEEKDAY, | |
'CRE_DAY': self.RE_DAY, | |
'CRE_DAY2': self.RE_DAY2, | |
'CRE_TIME': self.RE_TIME, | |
'CRE_REMAINING': self.RE_REMAINING, | |
'CRE_RTIMEHMS': self.RE_RTIMEHMS, | |
'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, | |
'CRE_RDATE': self.RE_RDATE, | |
'CRE_RDATE3': self.RE_RDATE3, | |
'CRE_TIMERNG1': self.TIMERNG1, | |
'CRE_TIMERNG2': self.TIMERNG2, | |
'CRE_TIMERNG3': self.TIMERNG3, | |
'CRE_TIMERNG4': self.TIMERNG4, | |
'CRE_DATERNG1': self.DATERNG1, | |
'CRE_DATERNG2': self.DATERNG2, | |
'CRE_DATERNG3': self.DATERNG3, | |
} | |
self.cre_keys = self.cre_source.keys() | |
def __getattr__(self, name): | |
if name in self.cre_keys: | |
value = re.compile(self.cre_source[name], self.re_option) | |
setattr(self, name, value) | |
return value | |
else: | |
raise AttributeError, name | |
def daysInMonth(self, month, year): | |
""" | |
Take the given month (1-12) and a given year (4 digit) return | |
the number of days in the month adjusting for leap year as needed | |
""" | |
result = None | |
if month > 0 and month <= 12: | |
result = self._DaysInMonthList[month - 1] | |
if month == 2: | |
if year in self._leapYears: | |
result += 1 | |
else: | |
if calendar.isleap(year): | |
self._leapYears.append(year) | |
result += 1 | |
return result | |
def buildSources(self, sourceTime=None): | |
""" | |
Return a dictionary of date/time tuples based on the keys | |
found in self.re_sources. | |
The current time is used as the default and any specified | |
item found in self.re_sources is inserted into the value | |
and the generated dictionary is returned. | |
""" | |
if sourceTime is None: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() | |
else: | |
(yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime | |
sources = {} | |
defaults = { 'yr': yr, 'mth': mth, 'dy': dy, | |
'hr': hr, 'mn': mn, 'sec': sec, } | |
for item in self.re_sources: | |
values = {} | |
source = self.re_sources[item] | |
for key in defaults.keys(): | |
if key in source: | |
values[key] = source[key] | |
else: | |
values[key] = defaults[key] | |
sources[item] = ( values['yr'], values['mth'], values['dy'], | |
values['hr'], values['mn'], values['sec'], wd, yd, isdst ) | |
return sources | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment