Last active
August 29, 2015 13:57
-
-
Save rchrd2/9773922 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| A pretty nice time parsing regex. | |
| @author Richard Caceres, @rchrd2 | |
| """ | |
| import re | |
| # The history for this regex is stored in my gist | |
| # https://gist.github.com/rchrd2/9773922 | |
| time_range_regex = re.compile(r''' | |
| \b # make sure it's either at start of string or word boundary | |
| ( | |
| # Time Ranges get higher precedence which has slightly different logic (ie 6-7pm) | |
| (?: | |
| (?:[0-1]?[0-9]) # first part of time ie: (3):00, (12):00 | |
| (?::[0-5][0-9])? # second part including the colon ie 12(:30) | |
| # am/pm is optional either way for the first part of a time range | |
| (?: # am/pm part | |
| [\s]{0,10} # limit how many spaces between time and am/pm | |
| (?:am|pm|a|p) | |
| )? | |
| (?: | |
| [\s]{0,10} | |
| (?:-|to) | |
| [\s]{0,10} | |
| (?:[0-1]?[0-9]) # first part of time ie: (3):00, (12):00 | |
| \:? | |
| (?:[0-5][0-9])? # second part including the colon ie 12(:30) | |
| (?: # am/pm part | |
| [\s]{0,10} # limit how many spaces between time and am/pm | |
| (?:am|pm|a|p) | |
| ) | |
| ) | |
| ) | |
| | | |
| # a time that's not a timerange (ie 6pm but not 6-7pm) | |
| (?: | |
| (?:[0-1]?[0-9]) # first part of time ie: (3):00, (12):00 | |
| (?: | |
| # if colon is there, am/pm is optional | |
| (?: | |
| (?::[0-5][0-9]) # second part including the colon ie 12(:30) | |
| (?: # am/pm part | |
| [\s]{0,10} # limit how many spaces between time and am/pm | |
| (?:am|pm|a|p) | |
| )? | |
| ) | |
| | | |
| # if colon is not there am/pm is required | |
| (?:[0-5][0-9])? # second part including the colon ie 12(:30) | |
| (?: # am/pm part | |
| [\s]{0,10} # limit how many spaces between time and am/pm | |
| (?:am|pm|a|p) | |
| ) | |
| ) | |
| ) | |
| ) | |
| \b # make sure it's either at end of string or word boundary | |
| ''', | |
| re.IGNORECASE|re.U|re.VERBOSE|re.DOTALL) | |
| time_range_regex.findall("6-7p 6p-7p 6:00pm - 7:00pm foo bar bazz 3pm to 5pm") | |
| time_range_regex.findall("6p 6 p 7 pm 7:00 pm 7:00 pmBar fun today at 3:00pm 300pm 400pm 3p 10a 23p (4:00pm) 11am whatthe10afoo 10afoo foo4p 5p 7-8") | |
| time_range_regex.findall("""7-9pm 7 - 9 pm 7-9 pm 7pm-9pm 7-9:00pm 7-9p 7-9 p 6:00 PM to 11:00 PM (PDT)""") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment