Skip to content

Instantly share code, notes, and snippets.

@mrdaemon
Created December 11, 2014 13:14
Show Gist options
  • Save mrdaemon/04cc9c521d4ceb1f78a3 to your computer and use it in GitHub Desktop.
Save mrdaemon/04cc9c521d4ceb1f78a3 to your computer and use it in GitHub Desktop.
def tokenize(string):
# Regular expression to extract blocks
_recipient = re.compile(r'^(.+?)\s(in|to|about)\b',
re.IGNORECASE)
_time = re.compile(r'\bin\s(\d+\s\b.+?\b)(?:\sto\b|\sabout\b|$)',
re.IGNORECASE)
_message = re.compile(r'\b(?:to|about)\s(.+?)(?:\sin\b|$)',
re.IGNORECASE)
recipientmatch = _recipient.search(string)
timematch = _time.search(string)
messagematch = _message.search(string)
if recipientmatch is None:
raise ReminderSyntaxError("Remind who?")
if timematch is None:
raise ReminderSyntaxError("Remind when?")
if messagematch is None:
raise ReminderSyntaxError("Remind what..?")
# Fetch groups as tokens, in case of multiples
# groups required
recipientblock = recipientmatch.group(1)
timeblock = timematch.group(1)
messageblock = messagematch.group(1)
return {"recipient": recipientblock,
"time": timeblock, "message": messageblock }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment